cnn_feature_extractor

nasa-jpl-memex · Jul 21, 2015 · 16c7f73 · 16c7f73
1 parent df33b0b
commit 16c7f73
Showing 11 changed files with 515 additions and 0 deletions.
diff --git a/data/caffenet/cat.jpg b/data/caffenet/cat.jpg
diff --git a/data/caffenet/cat_gray.jpg b/data/caffenet/cat_gray.jpg
diff --git a/data/caffenet/cnn.csv b/data/caffenet/cnn.csv
diff --git a/data/caffenet/cnn.svm b/data/caffenet/cnn.svm
diff --git a/data/caffenet/file_list.txt b/data/caffenet/file_list.txt
@@ -0,0 +1,3 @@
+/home/sun/prog/smqtk/src/data/caffenet/fish-bike.jpg 0
+/home/sun/prog/smqtk/src/data/caffenet/cat_gray.jpg 0
+/home/sun/prog/smqtk/src/data/caffenet/cat.jpg 0
diff --git a/data/caffenet/fish-bike.jpg b/data/caffenet/fish-bike.jpg
diff --git a/data/caffenet/imagenet_val.prototxt b/data/caffenet/imagenet_val.prototxt
@@ -0,0 +1,238 @@
+name: "CaffeNet"
+layer {
+  name: "data"
+  type: "ImageData"
+  top: "data"
+  top: "label"
+  transform_param {
+    mirror: false
+    crop_size: 227
+    mean_file: "data/caffenet/imagenet_mean.binaryproto"
+  }
+  image_data_param {
+    source: "data/caffenet/file_list.txt"
+    batch_size: 1
+    new_height: 256
+    new_width: 256
+  }
+}
+layer {
+  name: "conv1"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv1"
+  convolution_param {
+    num_output: 96
+    kernel_size: 11
+    stride: 4
+  }
+}
+layer {
+  name: "relu1"
+  type: "ReLU"
+  bottom: "conv1"
+  top: "conv1"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
+}
+layer {
+  name: "norm1"
+  type: "LRN"
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
+    local_size: 5
+    alpha: 0.0001
+    beta: 0.75
+  }
+}
+layer {
+  name: "conv2"
+  type: "Convolution"
+  bottom: "norm1"
+  top: "conv2"
+  convolution_param {
+    num_output: 256
+    pad: 2
+    kernel_size: 5
+    group: 2
+  }
+}
+layer {
+  name: "relu2"
+  type: "ReLU"
+  bottom: "conv2"
+  top: "conv2"
+}
+layer {
+  name: "pool2"
+  type: "Pooling"
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
+}
+layer {
+  name: "norm2"
+  type: "LRN"
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
+    local_size: 5
+    alpha: 0.0001
+    beta: 0.75
+  }
+}
+layer {
+  name: "conv3"
+  type: "Convolution"
+  bottom: "norm2"
+  top: "conv3"
+  convolution_param {
+    num_output: 384
+    pad: 1
+    kernel_size: 3
+  }
+}
+layer {
+  name: "relu3"
+  type: "ReLU"
+  bottom: "conv3"
+  top: "conv3"
+}
+layer {
+  name: "conv4"
+  type: "Convolution"
+  bottom: "conv3"
+  top: "conv4"
+  convolution_param {
+    num_output: 384
+    pad: 1
+    kernel_size: 3
+    group: 2
+  }
+}
+layer {
+  name: "relu4"
+  type: "ReLU"
+  bottom: "conv4"
+  top: "conv4"
+}
+layer {
+  name: "conv5"
+  type: "Convolution"
+  bottom: "conv4"
+  top: "conv5"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    group: 2
+  }
+}
+layer {
+  name: "relu5"
+  type: "ReLU"
+  bottom: "conv5"
+  top: "conv5"
+}
+layer {
+  name: "pool5"
+  type: "Pooling"
+  bottom: "conv5"
+  top: "pool5"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
+}
+layer {
+  name: "fc6"
+  type: "InnerProduct"
+  bottom: "pool5"
+  top: "fc6"
+  inner_product_param {
+    num_output: 4096
+  }
+}
+layer {
+  name: "relu6"
+  type: "ReLU"
+  bottom: "fc6"
+  top: "fc6"
+}
+layer {
+  name: "drop6"
+  type: "Dropout"
+  bottom: "fc6"
+  top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layer {
+  name: "fc7"
+  type: "InnerProduct"
+  bottom: "fc6"
+  top: "fc7"
+  inner_product_param {
+    num_output: 4096
+  }
+}
+layer {
+  name: "relu7"
+  type: "ReLU"
+  bottom: "fc7"
+  top: "fc7"
+}
+layer {
+  name: "drop7"
+  type: "Dropout"
+  bottom: "fc7"
+  top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layer {
+  name: "fc8"
+  type: "InnerProduct"
+  bottom: "fc7"
+  top: "fc8"
+  inner_product_param {
+    num_output: 1000
+  }
+}
+layer {
+  name: "prob"
+  type: "Softmax"
+  bottom: "fc8"
+  top: "prob"
+}
+layer {
+  name: "accuracy"
+  type: "Accuracy"
+  bottom: "prob"
+  bottom: "label"
+  top: "accuracy"
+}
+layer {
+  name: "loss"
+  type: "SoftmaxWithLoss"
+  bottom: "fc8"
+  bottom: "label"
+  top: "loss"
+}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -5,3 +5,11 @@ option(SMQTK_BUILD_FRAME_EXTRACTOR
 if( SMQTK_BUILD_FRAME_EXTRACTOR )
   add_subdirectory( frame_extractor )
 endif()
+
+option(SMQTK_BUILD_CNN_FEATURE_EXTRACTOR
+  "Enable building of the deep learning feature extraction. This requires Caffe."
+  OFF
+  )
+if( SMQTK_BUILD_CNN_FEATURE_EXTRACTOR )
+  add_subdirectory( cnn_feature_extractor )
+endif()
diff --git a/src/cnn_feature_extractor/CMakeLists.txt b/src/cnn_feature_extractor/CMakeLists.txt
@@ -0,0 +1,25 @@
+project(smqtk_cnn_feature_extractor)
+cmake_minimum_required(VERSION 2.8)
+
+find_package(Caffe REQUIRED)
+include_directories(SYSTEM ${Caffe_INCLUDE_DIRS})
+
+set(cnn_feature_extractor_srcs
+    cnn_feature_extractor.cxx
+    )
+
+add_executable(cnn_feature_extractor
+    ${cnn_feature_extractor_srcs})
+target_link_libraries(cnn_feature_extractor
+    ${Caffe_LIBRARIES})
+
+set_property(
+    TARGET cnn_feature_extractor
+    PROPERTY
+        COMPILE_FLAGS "${flags}")
+
+install(
+  TARGETS     cnn_feature_extractor
+  DESTINATION bin
+  COMPONENT   tools
+  )
diff --git a/src/cnn_feature_extractor/README.txt b/src/cnn_feature_extractor/README.txt
@@ -0,0 +1,18 @@
+
+Using Caffe (http://caffe.berkeleyvision.org/), cnn_feature_extractor extracts a feature vector from each input image. It is assumed Caffe has been built and the header files and library (libcaffe.so) are available.
+
+To run the program, first download two files
+bvlc_reference_caffenet.caffemodel
+imagenet_mean.binaryproto
+from Caffe and copy them to
+$SMQTK/data/caffenet
+
+Then run the following command from the shell
+cd $SMQTK_SRC
+
+cnn_feature_extractor data/caffenet/bvlc_reference_caffenet.caffemodel data/caffenet/imagenet_val.prototxt fc7 data/caffenet/cnn 3 csv
+
+cnn_feature_extractor data/caffenet/bvlc_reference_caffenet.caffemodel data/caffenet/imagenet_val.prototxt fc7 data/caffenet/cnn 3 svm
+
+cnn_feature_extractor data/caffenet/bvlc_reference_caffenet.caffemodel data/caffenet/imagenet_val.prototxt fc7 data/caffenet/cnn 3 stdout >& /dev/null
+
diff --git a/src/cnn_feature_extractor/cnn_feature_extractor.cxx b/src/cnn_feature_extractor/cnn_feature_extractor.cxx
@@ -0,0 +1,217 @@
+#include <string>
+#include <vector>
+#include <fstream>
+#include <limits>
+#include <math.h>
+
+#include "boost/algorithm/string.hpp"
+#include "boost/make_shared.hpp"
+#include "google/protobuf/text_format.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/net.hpp"
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/util/io.hpp"
+#include "caffe/vision_layers.hpp"
+
+using caffe::Blob;
+using caffe::Caffe;
+using caffe::Net;
+using boost::shared_ptr;
+using std::string;
+
+template<typename Dtype>
+int feature_extraction_pipeline(int argc, char** argv);
+
+int main(int argc, char** argv)
+{
+  return feature_extraction_pipeline<float>(argc, argv);
+}
+
+template<typename Dtype>
+int feature_extraction_pipeline(int argc, char** argv)
+{
+  ::google::InitGoogleLogging(argv[0]);
+  const int num_required_args = 7;
+  if (argc < num_required_args)
+  {
+    LOG(ERROR)<<
+    "This program takes in a trained network and an input data layer, and then"
+    " extract features of the input data produced by the net. It writes out features in text or binary files, instead of database\n"
+    "Usage: extract_features  pretrained_net_param"
+    "  feature_extraction_proto_file  extract_feature_blob_name1[,name2,...]"
+    "  save_feature_dataset_name1[,name2,...]  num_mini_batches  output_format[csv,svm,stdout]"
+    "  [CPU/GPU] [DEVICE_ID=0]\n"
+    "Note: you can extract multiple features in one pass by specifying"
+    " multiple feature blob names and dataset names seperated by ','."
+    " The names cannot contain white space characters and the number of blobs"
+    " and datasets must be equal.";
+    return 1;
+  }
+  int arg_pos = num_required_args;
+
+  arg_pos = num_required_args;
+  if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0)
+  {
+    LOG(ERROR)<< "Using GPU";
+    uint device_id = 0;
+    if (argc > arg_pos + 1)
+    {
+      device_id = atoi(argv[arg_pos + 1]);
+      CHECK_GE(device_id, 0);
+    }
+    LOG(ERROR) << "Using Device_id=" << device_id;
+    Caffe::SetDevice(device_id);
+    Caffe::set_mode(Caffe::GPU);
+  }
+  else
+  {
+    LOG(ERROR) << "Using CPU";
+    Caffe::set_mode(Caffe::CPU);
+  }
+
+  arg_pos = 0;  // the name of the executable
+  std::string pretrained_binary_proto(argv[++arg_pos]);
+
+  // Expected prototxt contains at least one data layer such as
+  //  the layer data_layer_name and one feature blob such as the
+  //  fc7 top blob to extract features.
+  /*
+   layers {
+     name: "data_layer_name"
+     type: DATA
+     data_param {
+       source: "/path/to/your/images/to/extract/feature/images_leveldb"
+       mean_file: "/path/to/your/image_mean.binaryproto"
+       batch_size: 128
+       crop_size: 227
+       mirror: false
+     }
+     top: "data_blob_name"
+     top: "label_blob_name"
+   }
+   layers {
+     name: "drop7"
+     type: DROPOUT
+     dropout_param {
+       dropout_ratio: 0.5
+     }
+     bottom: "fc7"
+     top: "fc7"
+   }
+   */
+  std::string feature_extraction_proto(argv[++arg_pos]);
+  shared_ptr<Net<Dtype> > feature_extraction_net(
+      new Net<Dtype>(feature_extraction_proto, caffe::TEST));
+  feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto);
+
+  std::string extract_feature_blob_names(argv[++arg_pos]);
+  std::vector<std::string> blob_names;
+  boost::split(blob_names, extract_feature_blob_names, boost::is_any_of(","));
+
+  std::string save_feature_dataset_names(argv[++arg_pos]);
+  std::vector<std::string> dataset_names;
+  boost::split(dataset_names, save_feature_dataset_names,
+               boost::is_any_of(","));
+  CHECK_EQ(blob_names.size(), dataset_names.size()) <<
+      " the number of blob names and dataset names must be equal";
+  size_t num_features = blob_names.size();
+
+  for (size_t i = 0; i < num_features; i++)
+  {
+    CHECK(feature_extraction_net->has_blob(blob_names[i]))
+        << "Unknown feature blob name " << blob_names[i]
+        << " in the network " << feature_extraction_proto;
+  }
+
+  int num_mini_batches = atoi(argv[++arg_pos]);
+  std::string out_format = std::string( argv[++arg_pos] );
+
+  const std::string out_svm("svm");
+  const std::string out_csv("csv");
+  const std::string out_stdout("stdout");
+
+  std::vector< shared_ptr<std::ofstream> > feature_ofs;
+  bool write_to_file = (out_format.find( out_csv ) != string::npos ||
+                        out_format.find( out_svm ) != string::npos );
+  if( write_to_file )
+  {
+    for (size_t i = 0; i < num_features; ++i)
+    {
+      std::string fname = dataset_names[i] + "." + out_format;
+      LOG(INFO)<< "Opening file " << fname ;
+      shared_ptr<std::ofstream> ofs = boost::make_shared<std::ofstream>(fname.c_str(), std::ofstream::out);
+      if( ofs->is_open() )
+      {
+        feature_ofs.push_back( ofs );
+      }
+      else
+      {
+        LOG(ERROR)<< "Cannot open file" << fname << std::endl;
+        exit(-1);
+      }
+    }
+  }
+
+  LOG(ERROR)<< "Extacting Features";
+
+  std::vector<Blob<float>*> input_vec;
+  std::vector<int> image_indices(num_features, 0);
+  for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index)
+  {
+    feature_extraction_net->Forward(input_vec);
+    for (int i = 0; i < num_features; ++i)
+    {
+      const shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[i]);
+      int batch_size = feature_blob->num();
+      int dim_features = feature_blob->count() / batch_size;
+      const Dtype* feature_blob_data;
+      for (int n = 0; n < batch_size; ++n)
+      {
+        feature_blob_data = feature_blob->cpu_data() + feature_blob->offset(n);
+        for (int d = 0; d < dim_features; ++d)
+        {
+          if( (out_format.find( out_svm ) != std::string::npos) &&
+              (fabs(feature_blob_data[d]) > std::numeric_limits<float>::epsilon() ) )
+          {
+            // sparse text
+            *feature_ofs[i] << d << ":" << feature_blob_data[d] << " ";
+          }
+          else if( out_format.find( out_csv ) != std::string::npos )
+          {
+            // csv format
+            if(d !=0 )
+              *feature_ofs[i] << ",";
+            *feature_ofs[i] << feature_blob_data[d];
+          }
+          else if( out_format.find( out_stdout ) != string::npos )
+          {
+            // stdout
+            std::cout << feature_blob_data[d] << " ";
+          }
+        } // if ( d < 8 || d == dim_features )
+
+        if( write_to_file )
+          *feature_ofs[i] << std::endl;
+        else
+          std::cout << std::endl;
+
+        ++image_indices[i];
+      }  // for (int n = 0; n < batch_size; ++n)
+      LOG(ERROR)<< "Extracted features of " << image_indices[i] <<
+      " query images for feature blob " << blob_names[i];
+    }  // for (int i = 0; i < num_features; ++i)
+  }  // for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index)
+
+  if( write_to_file )
+  {
+    for (size_t i = 0; i < feature_ofs.size(); ++i)
+    {
+      feature_ofs[i]->close();
+    }
+  }
+
+  LOG(ERROR)<< "Successfully extracted the features!";
+  return 0;
+}