diff --git a/2_finetuning-alexnet-wikiart-style/download_wikiart.py b/2_finetuning-alexnet-wikiart-style/download_wikiart.py new file mode 100644 index 0000000..fc7d942 --- /dev/null +++ b/2_finetuning-alexnet-wikiart-style/download_wikiart.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +""" +Download WikiArt images and write +Caffe ImageData layer files + +This script has been forked from +https://github.com/BVLC/caffe/blob/master/examples/finetune_flickr_style/assemble_data.py +""" +import os +import urllib +import argparse +import numpy as np +import pandas as pd +from skimage import io +import multiprocessing + +root_dirname = os.path.abspath(os.path.dirname(__file__)) +training_dirname = os.path.join(root_dirname, 'data/wikiart') + +def download_image(args_tuple): + "For use with multiprocessing map. Returns filename on fail." + try: + url, filename = args_tuple + if not os.path.exists(filename): + urllib.urlretrieve(url, filename) + test_read_image = io.imread(filename) + return True + except KeyboardInterrupt: + raise Exception() # multiprocessing doesn't catch keyboard exceptions + except: + return False + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Download a subset of the WikiArt style dataset to a directory.') + parser.add_argument( + '-s', '--seed', type=int, default=0, + help="random seed") + parser.add_argument( + '-i', '--images', type=int, default=-1, + help="number of images to use (-1 for all [default])", + ) + parser.add_argument( + '-w', '--workers', type=int, default=-1, + help="num workers used to download images. -x uses (all - x) cores [-1 default]." + ) + + args = parser.parse_args() + np.random.seed(args.seed) + + # Read data, shuffle order, and subsample. + csv_filename = os.path.join(root_dirname, 'wikiart.csv.gz') + df = pd.read_csv(csv_filename, index_col=0, compression='gzip') + df = df.iloc[np.random.permutation(df.shape[0])] + if args.images > 0 and args.images < df.shape[0]: + df = df.iloc[:args.images] + + # Make directory for images and get local filenames. + if training_dirname is None: + training_dirname = os.path.join(root_dirname, 'data/wikiart') + images_dirname = os.path.join(training_dirname, 'images') + if not os.path.exists(images_dirname): + os.makedirs(images_dirname) + df['image_filename'] = [ + os.path.join(images_dirname, _ + '.jpg') for _ in df.index.values + ] + + # Download images. + num_workers = args.workers + if num_workers <= 0: + num_workers = multiprocessing.cpu_count() + num_workers + print('Downloading {} images with {} workers...'.format( + df.shape[0], num_workers)) + pool = multiprocessing.Pool(processes=num_workers) + map_args = zip(df['image_url'], df['image_filename']) + results = pool.map(download_image, map_args) + + # Only keep rows with valid images, and write out training file lists. + df = df[results] + for split in ['train', 'test']: + split_df = df[df['_split'] == split] + filename = os.path.join(training_dirname, '{}.txt'.format(split)) + split_df[['image_filename', 'label']].to_csv( + filename, sep=' ', header=None, index=None) + print('Writing train/val for {} successfully downloaded images.'.format( + df.shape[0])) \ No newline at end of file diff --git a/2_finetuning-alexnet-wikiart-style/styles.csv b/2_finetuning-alexnet-wikiart-style/styles.csv new file mode 100644 index 0000000..2e0ff38 --- /dev/null +++ b/2_finetuning-alexnet-wikiart-style/styles.csv @@ -0,0 +1,11 @@ +label,style +0,Art Nouveau (Modern) +1,Baroque +2,Expressionism +3,Impressionism +4,Neoclassicism +5,Post-Impressionism +6,Realism +7,Romanticism +8,Surrealism +9,Symbolism \ No newline at end of file diff --git a/2_finetuning-alexnet-wikiart-style/wikiart.csv.gz b/2_finetuning-alexnet-wikiart-style/wikiart.csv.gz new file mode 100644 index 0000000..316b608 Binary files /dev/null and b/2_finetuning-alexnet-wikiart-style/wikiart.csv.gz differ diff --git a/3_visualizing-breaking-convnets/breaking-convnets.ipynb b/3_visualizing-breaking-convnets/breaking-convnets.ipynb new file mode 100644 index 0000000..a4a6fde --- /dev/null +++ b/3_visualizing-breaking-convnets/breaking-convnets.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Setup\n", + "import os\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.cm as cm\n", + "%matplotlib inline\n", + "\n", + "# Make sure that caffe is on the python path\n", + "caffe_root = ''\n", + "import sys\n", + "sys.path.insert(0, caffe_root + 'python')\n", + "\n", + "import caffe\n", + "\n", + "plt.rcParams['figure.figsize'] = (10, 10)\n", + "plt.rcParams['image.interpolation'] = 'nearest'\n", + "plt.rcParams['image.cmap'] = 'gray'\n", + "\n", + "model_prototxt = 'deploy.prototxt'\n", + "pretrained_model = os.path.join(caffe_root,'models/bvlc_alexnet/bvlc_alexnet.caffemodel')\n", + "\n", + "caffe.set_mode_cpu()\n", + "net = caffe.Classifier(model_prototxt, pretrained_model,\n", + " mean=np.load(os.path.join(caffe_root,'python/caffe/imagenet/ilsvrc_2012_mean.npy')).mean(1).mean(1),\n", + " channel_swap=(2,1,0),\n", + " raw_scale=255,\n", + " image_dims=(256, 256))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start with the image of a cat and desired label as goose. We update input image for `n_iterations` iterations till it maximises class score for goose." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "image_file = 'cat_2.jpg'\n", + "input_image = caffe.io.load_image(image_file)\n", + "\n", + "n_iterations = 40\n", + "desired_label = 99 # goose\n", + "label = np.zeros((1,1,1,1000))\n", + "label[0,0,0,desired_label] = 1;\n", + "step_size = 1000\n", + "reg = 0.002\n", + "\n", + "input_image = net.transformer.preprocess('data',input_image)\n", + "fooling_image = input_image[:]\n", + "zero_image = np.zeros(fooling_image.shape)\n", + "\n", + "for i in range(n_iterations):\n", + " net.blobs['data'].data[...] = fooling_image\n", + " \n", + " # Perform forward pass\n", + " # TODO\n", + " # \n", + " # END OF YOUR CODE\n", + " \n", + " # Perform backward pass for the desired class\n", + " # TODO\n", + " # \n", + " # END OF YOUR CODE\n", + " \n", + " # Compute gradient and incremental update\n", + " # Store update value in di\n", + " # TODO\n", + " # \n", + " # di = \n", + " # END OF YOUR CODE\n", + " \n", + " fooling_image += di\n", + " zero_image += di\n", + " \n", + "plt.subplot(1,2,1)\n", + "plt.imshow(net.transformer.deprocess('data', zero_image))\n", + "plt.subplot(1,2,2)\n", + "plt.imshow(net.transformer.deprocess('data', fooling_image))\n", + "\n", + "# Save the image\n", + "plt.imsave('cat_fooled.jpg',net.transformer.deprocess('data', fooling_image))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we check our prediction on the modified image to make sure it is able to 'fool' the ConvNet." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['n01855672 goose' 'n01806567 quail' 'n01847000 drake'\n", + " 'n02018207 American coot, marsh hen, mud hen, water hen, Fulica americana'\n", + " 'n01592084 chickadee']\n" + ] + } + ], + "source": [ + "image_file = 'cat_fooled.jpg'\n", + "net.blobs['data'].data[...] = net.transformer.preprocess('data',caffe.io.load_image(image_file))\n", + "prediction = net.forward()\n", + "# load labels\n", + "imagenet_labels_filename = os.path.join(caffe_root,'data/ilsvrc12/synset_words.txt')\n", + "try:\n", + " labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\\t')\n", + "except:\n", + " !../data/ilsvrc12/get_ilsvrc_aux.sh\n", + " labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\\t')\n", + "# sort top k predictions from softmax output\n", + "top_k = prediction['fc8'][0].flatten().argsort()[-1:-6:-1]\n", + "print labels[top_k]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/3_visualizing-breaking-convnets/cat.jpg b/3_visualizing-breaking-convnets/cat.jpg new file mode 100644 index 0000000..86dff4c Binary files /dev/null and b/3_visualizing-breaking-convnets/cat.jpg differ diff --git a/3_visualizing-breaking-convnets/cat_2.jpg b/3_visualizing-breaking-convnets/cat_2.jpg new file mode 100644 index 0000000..c2e658b Binary files /dev/null and b/3_visualizing-breaking-convnets/cat_2.jpg differ diff --git a/3_visualizing-breaking-convnets/class-model-visualizations.ipynb b/3_visualizing-breaking-convnets/class-model-visualizations.ipynb new file mode 100644 index 0000000..7ba1996 --- /dev/null +++ b/3_visualizing-breaking-convnets/class-model-visualizations.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Setup\n", + "import os\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.cm as cm\n", + "%matplotlib inline\n", + "\n", + "# Make sure that caffe is on the python path\n", + "caffe_root = ''\n", + "import sys\n", + "sys.path.insert(0, caffe_root + 'python')\n", + "\n", + "import caffe\n", + "\n", + "plt.rcParams['figure.figsize'] = (10, 10)\n", + "plt.rcParams['image.interpolation'] = 'nearest'\n", + "plt.rcParams['image.cmap'] = 'gray'\n", + "\n", + "model_prototxt = 'deploy.prototxt'\n", + "pretrained_model = os.path.join(caffe_root,'models/bvlc_alexnet/bvlc_alexnet.caffemodel')\n", + "\n", + "caffe.set_mode_cpu()\n", + "net = caffe.Classifier(model_prototxt, pretrained_model,\n", + " mean=np.load(os.path.join(caffe_root,'python/caffe/imagenet/ilsvrc_2012_mean.npy')).mean(1).mean(1),\n", + " channel_swap=(2,1,0),\n", + " raw_scale=255,\n", + " image_dims=(256, 256))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sanity check just to see if everything is set up properly." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted class is #285.\n", + "['n02124075 Egyptian cat' 'n02123045 tabby, tabby cat'\n", + " 'n02123159 tiger cat' 'n02127052 lynx, catamount'\n", + " 'n02120505 grey fox, gray fox, Urocyon cinereoargenteus']\n" + ] + } + ], + "source": [ + "image_file = 'cat.jpg'\n", + "input_image = caffe.io.load_image(image_file)\n", + "prediction = net.predict([input_image])\n", + "print(\"Predicted class is #{}.\".format(prediction[0].argmax()))\n", + "\n", + "# load labels\n", + "imagenet_labels_filename = os.path.join(caffe_root,'data/ilsvrc12/synset_words.txt')\n", + "try:\n", + " labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\\t')\n", + "except:\n", + " !../data/ilsvrc12/get_ilsvrc_aux.sh\n", + " labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\\t')\n", + "\n", + "# sort top k predictions from softmax output\n", + "top_k = prediction[0].flatten().argsort()[-1:-6:-1]\n", + "print labels[top_k]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we set the number of iterations, desired label, learning rate and the randomly generated image that we start with." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_iterations = 250\n", + "input_data = np.random.random((1,3,227,227))\n", + "label_index = 281 # cat. 99: goose, 285: cat, 543: dumbbell\n", + "label = np.zeros((1,1,1,1000))\n", + "label[0,0,0,label_index] = 1;\n", + "learning_rate = 10000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Iteratively perform gradient ascent over input image space to generate visualization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for i in range(n_iterations):\n", + " # Perform forward pass\n", + " # TODO\n", + " # \n", + " # END OF YOUR CODE\n", + " \n", + " # Perform backward pass for the desired class\n", + " bw = net.backward(**{net.outputs[0]: label})\n", + " \n", + " # Perform gradient ascent over the input image\n", + " # TODO\n", + " # \n", + " #\n", + " # END OF YOUR CODE\n", + " \n", + " if i%20 == 0:\n", + " print(\"Iteration #{}.\".format(i))\n", + "\n", + "print 'Done'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Normalize and view the class model visualization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data = input_data[0].transpose(1,2,0)\n", + "data -= data.min()\n", + "data /= data.max()\n", + "plt.imshow(data)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/3_visualizing-breaking-convnets/images/eq1.png b/3_visualizing-breaking-convnets/images/eq1.png new file mode 100644 index 0000000..b30fa72 Binary files /dev/null and b/3_visualizing-breaking-convnets/images/eq1.png differ diff --git a/3_visualizing-breaking-convnets/images/eq2.png b/3_visualizing-breaking-convnets/images/eq2.png new file mode 100644 index 0000000..142732d Binary files /dev/null and b/3_visualizing-breaking-convnets/images/eq2.png differ diff --git a/3_visualizing-breaking-convnets/saliency-maps.ipynb b/3_visualizing-breaking-convnets/saliency-maps.ipynb new file mode 100644 index 0000000..047dde0 --- /dev/null +++ b/3_visualizing-breaking-convnets/saliency-maps.ipynb @@ -0,0 +1,98 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Setup\n", + "import os\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.cm as cm\n", + "%matplotlib inline\n", + "\n", + "# Make sure that caffe is on the python path\n", + "caffe_root = ''\n", + "import sys\n", + "sys.path.insert(0, caffe_root + 'python')\n", + "\n", + "import caffe\n", + "\n", + "plt.rcParams['figure.figsize'] = (10, 10)\n", + "plt.rcParams['image.interpolation'] = 'nearest'\n", + "plt.rcParams['image.cmap'] = 'gray'\n", + "\n", + "model_prototxt = 'deploy.prototxt'\n", + "pretrained_model = os.path.join(caffe_root,'models/bvlc_alexnet/bvlc_alexnet.caffemodel')\n", + "\n", + "caffe.set_mode_cpu()\n", + "net = caffe.Classifier(model_prototxt, pretrained_model,\n", + " mean=np.load(os.path.join(caffe_root,'python/caffe/imagenet/ilsvrc_2012_mean.npy')).mean(1).mean(1),\n", + " channel_swap=(2,1,0),\n", + " raw_scale=255,\n", + " image_dims=(256, 256))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "image_file = 'cat.jpg'\n", + "input_image = caffe.io.load_image(image_file)\n", + "\n", + "label_index = 281 # 99: goose, 285: cat, 543: dumbbell\n", + "input_data = np.zeros((1,3,227,227))\n", + "label = np.zeros((1,1,1,1000))\n", + "label[0,0,0,label_index] = 1\n", + "learning_rate = 1000\n", + "\n", + "prediction = net.predict([input_image])\n", + "\n", + "# Compute gradient of class score with respect to input image.\n", + "# Store visualization matrix in \"saliency_map\"\n", + "# TODO\n", + "# \n", + "# \n", + "# \n", + "# \n", + "# \n", + "# \n", + "# END OF YOUR CODE\n", + "\n", + "plt.subplot(1,2,1)\n", + "plt.imshow(saliency_map)\n", + "plt.subplot(1,2,2)\n", + "plt.imshow(net.transformer.deprocess('data', net.blobs['data'].data[0]))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..da4757e --- /dev/null +++ b/README.md @@ -0,0 +1,268 @@ +# [ECE 6504 Deep Learning for Perception][1] + +## Homework 2 + +In this homework, we continue learning [Caffe][2], and implement dropout and +data augmentation in our earlier ConvNet. We then fine-tune a pre-trained model, +AlexNet, for style classification on the WikiArt dataset. Finally, we visualize +data gradients and learn to generate images to fool a pre-trained ConvNet. + +Download the starter code [here](https://github.com/batra-mlp-lab/VT-F15-ECE6504-HW2/archive/1.0.zip). + +### Q1: Dropout and Data Augmentation (15 points) + +In this exercise, we'll be working with the same two-layer ConvNet +we trained on the CIFAR-10 dataset in the previous assignment and +implementing two ways to reduce overfitting - dropout and data +augmentation, using Caffe. + +Go through the specification of the [`DropoutLayer`][6] and +read network prototxt files of [AlexNet][9] & [CaffeNet][10] to see +how dropout layers are implemented in Caffe. + +There is in-built support for simple data augmentations such +as random crops and mirroring in Caffe. This is defined by the +`transform_param` parameter inside a `DataLayer` definition. + +``` +layer { + name: "data" + type: "Data" + [...] + transform_param { + scale: 0.1 + mean_file_size: mean.binaryproto + # for images in particular horizontal mirroring and random cropping + # can be done as simple data augmentations. + mirror: 1 # 1 = on, 0 = off + # crop a `crop_size` x `crop_size` patch: + # - at random during training + # - from the center during testing + crop_size: 227 + } +} +``` + +- Use a smaller training set, so that the network overfits (high training accuracy, low validation accuracy) +- Define a dropout layer +- Add data augmentation parameters to the Data layer +- Train the network again on the smaller set. You should see higher validation accuracy + +**Optional**: Other common data augmentation techniques used to +improve accuracy are rotations, shearing & perspective +wrapping. Take a look at the [ChenglongChen/caffe-rta][8] +repository to see how the author has implemented these. + +**Deliverables** + +- Network prototxt with dropout and data augmentation (5 points) +- `Validation Loss v/s Iterations` plot with and without dropout and data augmentation (10 points) + + +### Q2: Fine-tuning AlexNet for Style classification on WikiArt data (20 points) + +Given the WikiArt dataset, which consists of 10000 images of paintings +of arbitrary sizes from 10 different styles - Baroque, Realism, +Expressionism, etc., the goal is to fine-tune a pretrained model, AlexNet, to +predict painting style with reasonable performance and minimal training time. + +#### Obtaining the dataset + +The dataset consists of 10000 images in total from 10 different styles +of painting - 1000 images each. Use the `download_wikiart.py` script +to download a subset of the data and split it into training and +validation sets. + +```bash +% python download_wikiart.py -h +usage: download_wikiart.py [-h] [-s SEED] [-i IMAGES] [-w WORKERS] + +Download a subset of the WikiArt style dataset to a directory. + +optional arguments: + -h, --help show this help message and exit + -s SEED, --seed SEED random seed + -i IMAGES, --images IMAGES + number of images to use (-1 for all [default]) + -w WORKERS, --workers WORKERS + num workers used to download images. -x uses (all - x) + cores [-1 default]. + +% python download_wikiart.py -i 2000 -s 761218 +Downloading 2000 images with 7 workers... +Writing train/val for 1996 successfully downloaded images. +``` + +#### Setting up the AlexNet prototxt files + +Copy the AlexNet prototxt files, `solver.prototxt` and `train_val.prototxt` from +`$CAFFE_ROOT/models/bvlc_alexnet` to the working directory. + +```bash +cp $CAFFE_ROOT/models/bvlc_alexnet/solver.prototxt ./ +cp $CAFFE_ROOT/models/bvlc_alexnet/train_val.prototxt ./ +``` + +Since you'll be fine-tuning a network pretrained on the ImageNet dataset, +you will also need the ImageNet mean file. Note that if you train a network +from scratch, then you should instead compute the mean over your own training +data. Run `$CAFFE_ROOT/data/ilsvrc12/get_ilsvrc_aux.sh` to obtain this. You +will also need the AlexNet pretrained model. + +```bash +python $CAFFE_ROOT/scripts/download_model_binary.py $CAFFE_ROOT/models/bvlc_alexnet +``` + +#### Transfer Learning + +There are two main transfer learning scenarios: + +- **ConvNet as a fixed feature extractor**: We take a ConvNet pretrained +on the ImageNet dataset, remove the final fully-connected layer and treat +the rest of the ConvNet as a fixed feature extractor for the new dataset. +We can train a linear classifier (linear SVM or SoftMax classifier) on these +extracted features (4096-D vectors for every image in case of AlexNet) for +the new dataset. In Caffe, this is achieved by setting the learning rates +of the intermediate layers (`blobs_lr`) to 0. + +- **Finetuning the ConvNet**: The second strategy is to not only replace +and retrain the classifier on top of the ConvNet on the new dataset, +but to also fine-tune the weights of the pretrained network by continuing +the backpropagation. + +#### Fine-tuning + +Look at `train_val.prototxt` and `solver.prototxt` closely. To fine-tune on the +WikiArt dataset, we'll start with the weights of the pretrained model for +all layers. Since our dataset consists of 10 classes instead of 1000 +(for ImageNet), we'll modify the last layer. Note that in Caffe when we +start training with a pretrained model, weights of layers with the same +name are retained and new layers are initialized with random weights. + +From the Caffe example on [fine-tuning CaffeNet for style recognition on +Flickr style data][3]: + +*We will also decrease the overall learning rate `base_lr` in the solver prototxt, +but boost the `blobs_lr` on the newly introduced layer. The idea is to have the +rest of the model change very slowly with new data, but let the new layer learn fast. +Additionally, we set `stepsize` in the solver to a lower value than if we were training +from scratch, since we’re virtually far along in training and therefore want the +learning rate to go down faster. Note that we could also entirely prevent fine-tuning +of all layers other than `fc8_flickr` by setting their `blobs_lr` to 0.* + +- Change the data layer +- Change last layer +- Modify hyperparameters + +Now you can start training. + +```bash +$CAFFE_ROOT/build/tools/caffe train -solver solver.txt -weights $CAFFE_ROOT/models/bvlc_alexnet/bvlc_alexnet.caffemodel + +``` + +**Deliverables** + +- Prototxt files (`train_val`,`solver`,`deploy`) (10 points) +- `Training Loss v/s Iteration` plot (5 points) +- [Kaggle contest][15] (5 points + up to 10 extra points for beating TA entry and top performers) + +### Q3: Visualizing and Breaking ConvNets (15 points) + +In this exercise, we'll work with the Python interface for Caffe and learn to +visualize data gradients and generate images to fool ConvNets. + +#### Class Model Visualizations + +We'll be using the method outlined in the paper "Deep Inside Convolutional +Networks: Visualising Image Classification Models and Saliency Maps" \[[3][11]\] +to visualize a class model learnt by a convolutional neural network. + +In order to generate the class model visualization, we need to optimize the +unnormalized class score with respect to the image. + +$$ +\mathop{\arg\,\max}\limits\_I S\_c(I) - \lambda \lVert I \rVert^2 +$$ + +This is done by standard backpropagation as done during the training phase +of the network with the difference that instead of updating the network +parameters, we'll be updating the image to maximise the score, a method +known as **gradient ascent**. Also note that we'll drop the final layer +of the network and maximize the unnormalized class score instead of the +probability as outlined in the paper. + +Copy the AlexNet `deploy.prototxt` into the working directory and edit it. + +```bash +cp $CAFFE_ROOT/models/bvlc_alexnet/deploy.prototxt 3_visualizing-breaking-convnets/ +``` + +- Delete the final layer +- Add "force_backward: true", to propagate the gradients back to the data layer +in the backward pass +- Change the number of input dimensions to 1 + +Open the IPython notebook `class-model-visualizations.ipynb` and complete +the missing code to generate the class model visualizations. + +#### Image-Specific Class Saliency Visualisation + +Section 3 of the paper \[[3][11]\] describes a method to understand which part +of an image is important for classification by visualizing the gradient +of the correct class score with respect to the input image. The core idea +behing this is to find the pixels which need to be changed the least. + +Open the IPython notebook `saliency-maps.ipynb` and complete the missing code +to extract and visualize image-specific saliency maps. + +#### Generating Fooling Images to Break ConvNets + +Several papers \[[4][12],[5][13],[6][14]\] have suggested ways to perform optimization over the +input image to construct images that break a trained ConvNet. These papers showed +that given a trained ConvNet, an input image, and a desired label, that we can +add a small amount of noise to the input image to force the ConvNet to classify +it as having the desired label. + +We will create a fooling image by solving the following optimization problem: + +$$ +x\_f = \mathop{\arg\,\min}\limits\_x (L (x,y,m) + \frac{\lambda}{2} \lVert x - x_0 \rVert ^2) +$$ + +Open the IPython notebook `breaking-convnets.ipynb` and complete the missing code +to generate fooling images that break pretrained ConvNets. + +**Deliverables** + +- Completed IPython notebooks `class-model-visualizations.ipynb`, +`saliency-maps.ipynb` & `breaking-convnets.ipynb` (5 points x 3) + +References: + +1. [Assignment 3, CS231n, Stanford][5] +2. [Fine-tuning CaffeNet for Style Recognition on “Flickr Style” Data][3] +3. [Simonyan et al., "Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps", ICLR 2014][11] +4. [Nguyen et al., "Deep Neural Networks are Easily Fooled: High Confidence Predictions for Unrecognizable Images", CVPR 2015][12] +5. [Szegedy et al., "Intriguing properties of neural networks"][13] +6. [Goodfellow et al., "Explaining and Harnessing Adversarial Examples", ICLR 2015][14] + +[1]: https://computing.ece.vt.edu/~f15ece6504/ +[2]: http://caffe.berkeleyvision.org/ +[3]: http://caffe.berkeleyvision.org/gathered/examples/finetune_flickr_style.html +[4]: http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel +[5]: http://cs231n.github.io/assignment3/ +[6]: http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1DropoutLayer.html +[7]: http://caffe.berkeleyvision.org/tutorial/data.html +[8]: https://github.com/ChenglongChen/caffe-windows +[9]: https://github.com/BVLC/caffe/blob/master/models/bvlc_alexnet/deploy.prototxt +[10]: https://github.com/BVLC/caffe/blob/master/models/bvlc_reference_caffenet/deploy.prototxt +[11]: http://arxiv.org/abs/1312.6034 +[12]: http://arxiv.org/abs/1412.1897 +[13]: http://arxiv.org/abs/1312.6199 +[14]: http://arxiv.org/abs/1412.6572 +[15]: https://inclass.kaggle.com/c/2015-fall-vt-ece-deep-learning-hw2 + +--- + +© 2015 Virginia Tech \ No newline at end of file