diff --git a/.gitignore b/.gitignore
index 5761abc..50e7488 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 *.o
+.idea/
+build/
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..8d989cd
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "include/easylogging++"]
+	path = include/easylogging++
+	url = https://github.com/easylogging/easyloggingpp
+[submodule "include/CLUE"]
+	path = include/CLUE
+	url = https://github.com/lindahua/CLUE
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 92217d2..a3d0241 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,49 @@
 cmake_minimum_required(VERSION 2.8)
 project( denseFlow )
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
+
+include(CheckCXXCompilerFlag)
+CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11)
+CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X)
+if(COMPILER_SUPPORTS_CXX11)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fPIC")
+elseif(COMPILER_SUPPORTS_CXX0X)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
+else()
+    message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.")
+endif()
+
+
 find_package( OpenCV REQUIRED )
-include_directories( ${OpenCV_INCLUDE_DIRS} )
-add_executable( denseFlow_gpu denseFlow_gpu.cpp )
-target_link_libraries( denseFlow_gpu ${OpenCV_LIBS} )
\ No newline at end of file
+find_package( LibZip REQUIRED )
+
+# BOOST
+FIND_PACKAGE(Boost REQUIRED python)
+FIND_PACKAGE(PythonLibs REQUIRED)
+
+if(LIBZIP_VERSION VERSION_LESS 0.11)
+    #old version LibZip
+    add_definitions(-DUSE_OBSEL_LIBZIP)
+endif()
+
+include_directories( ${OpenCV_INCLUDE_DIRS} ${LIBZIP_INCLUDE_DIR_ZIP} ${LIBZIP_INCLUDE_DIR_ZIPCONF} include/ include/easylogging++/src include/CLUE/include)
+include_directories(SYSTEM ${Boost_INCLUDE_DIR} ${PYTHON_INCLUDE_DIR})
+
+add_library(denseflow src/common.cpp src/dense_flow.cpp src/dense_flow_gpu.cpp src/dense_warp_flow_gpu.cpp src/zip_utils.cpp)
+target_link_libraries( denseflow ${OpenCV_LIBS} ${LIBZIP_LIBRARY})
+
+add_library(pydenseflow SHARED src/py_denseflow.cpp)
+target_link_libraries(pydenseflow
+        denseflow
+        ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} ${OpenCV_LIBS}
+        )
+
+add_executable( extract_cpu tools/extract_flow.cpp)
+target_link_libraries( extract_cpu ${OpenCV_LIBS} ${LIBZIP_LIBRARY} denseflow)
+
+add_executable( extract_gpu tools/extract_flow_gpu.cpp)
+target_link_libraries( extract_gpu ${OpenCV_LIBS} ${LIBZIP_LIBRARY} denseflow)
+
+add_executable( extract_warp_gpu tools/extract_warp_flow_gpu.cpp)
+target_link_libraries( extract_warp_gpu ${OpenCV_LIBS} ${LIBZIP_LIBRARY} denseflow)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..49c9b11
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2016 Multimedia Laboratory, The Chinese University of Hong Kong.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README b/README
deleted file mode 100644
index 8372cfd..0000000
--- a/README
+++ /dev/null
@@ -1,6 +0,0 @@
-This is a sample code for extrating dense flow field given a video.
-
-Usage:
-./denseFlow_gpu -f test.avi -x tmp/flow_x -y tmp/flow_x -i tmp/image -b 20 -t 1 -d 0 -s 1
-test.avi: input video
-tmp: folder containing RGB images and optical flow images
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..79032c5
--- /dev/null
+++ b/README.md
@@ -0,0 +1,50 @@
+**Please Note**: This repository is no longer maintained. For extracting optical flow from videos please use [this latest tool](https://github.com/open-mmlab/denseflow) from Open-MMLAB.
+----
+
+
+Extracting dense flow field given a video.
+
+#### Dependencies:
+- LibZip:
+to install on ubuntu ```apt-get install libzip-dev``` on mac ```brew install libzip```
+
+#### For OpenCV 3 Users
+Please see the [opencv-3.1](https://github.com/yjxiong/dense_flow/tree/opencv-3.1) branch. Many thanks to @victorhcm for the contributions!
+
+### Install
+```
+git clone --recursive http://github.com/yjxiong/dense_flow
+mkdir build && cd build
+cmake .. && make -j
+```
+
+### Usage
+```
+./extract_gpu -f=test.avi -x=tmp/flow_x -y=tmp/flow_y -i=tmp/image -b=20 -t=1 -d=0 -s=1 -o=dir
+```
+- `test.avi`: input video
+- `tmp`: folder containing RGB images and optical flow images
+- `dir`: output generated images to folder. if set to `zip`, will write images to zip files instead.
+
+### Warp Flow
+The warp optical flow is used in the following paper
+
+```
+@inproceedings{TSN2016ECCV,
+  author    = {Limin Wang and
+               Yuanjun Xiong and
+               Zhe Wang and
+               Yu Qiao and
+               Dahua Lin and
+               Xiaoou Tang and
+               Luc {Van Gool}},
+  title     = {Temporal Segment Networks: Towards Good Practices for Deep Action Recognition},
+  booktitle   = {ECCV},
+  year      = {2016},
+}
+```
+
+To extract warp flow, use the command
+```
+./extract_warp_gpu -f test.avi -x tmp/flow_x -y tmp/flow_y -i tmp/image -b 20 -t 1 -d 0 -s 1 -o dir
+```
diff --git a/build_of.py b/build_of.py
new file mode 100644
index 0000000..a19af0e
--- /dev/null
+++ b/build_of.py
@@ -0,0 +1,97 @@
+__author__ = 'yjxiong'
+
+import cv2
+import os
+from multiprocessing import Pool, current_process
+
+import argparse
+out_path = ''
+
+
+def dump_frames(vid_path):
+    video = cv2.VideoCapture(vid_path)
+    vid_name = vid_path.split('/')[-1].split('.')[0]
+    out_full_path = os.path.join(out_path, vid_name)
+
+    fcount = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT))
+    try:
+        os.mkdir(out_full_path)
+    except OSError:
+        pass
+    file_list = []
+    for i in xrange(fcount):
+        ret, frame = video.read()
+        assert ret
+        cv2.imwrite('{}/{:06d}.jpg'.format(out_full_path, i), frame)
+        access_path = '{}/{:06d}.jpg'.format(vid_name, i)
+        file_list.append(access_path)
+    print '{} done'.format(vid_name)
+    return file_list
+
+
+def run_optical_flow(vid_item, dev_id=0):
+    vid_path = vid_item[0]
+    vid_id = vid_item[1]
+    vid_name = vid_path.split('/')[-1].split('.')[0]
+    out_full_path = os.path.join(out_path, vid_name)
+    try:
+        os.mkdir(out_full_path)
+    except OSError:
+        pass
+
+    current = current_process()
+    dev_id = int(current._identity[0]) - 1
+    image_path = '{}/img'.format(out_full_path)
+    flow_x_path = '{}/flow_x'.format(out_full_path)
+    flow_y_path = '{}/flow_y'.format(out_full_path)
+
+    cmd = './build/extract_gpu -f={} -x={} -y={} -i={} -b=20 -t=1 -d={} -s=1 -o=zip'.format(vid_path, flow_x_path, flow_y_path, image_path, dev_id)
+
+    os.system(cmd)
+    print '{} {} done'.format(vid_id, vid_name)
+    return True
+
+def run_warp_optical_flow(vid_item, dev_id=0):
+    vid_path = vid_item[0]
+    vid_id = vid_item[1]
+    vid_name = vid_path.split('/')[-1].split('.')[0]
+    out_full_path = os.path.join(out_path, vid_name)
+    try:
+        os.mkdir(out_full_path)
+    except OSError:
+        pass
+
+    current = current_process()
+    dev_id = int(current._identity[0]) - 1
+    flow_x_path = '{}/flow_x'.format(out_full_path)
+    flow_y_path = '{}/flow_y'.format(out_full_path)
+
+    cmd = './build/extract_warp_gpu -f {} -x {} -y {} -b 20 -t 1 -d {} -s 1 -o zip'.format(vid_path, flow_x_path, flow_y_path, dev_id)
+
+    os.system(cmd)
+    print 'warp on {} {} done'.format(vid_id, vid_name)
+    return True
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="extract optical flows")
+    parser.add_argument("src_dir")
+    parser.add_argument("out_dir")
+    parser.add_argument("--num_worker", type=int, default=8)
+    parser.add_argument("--flow_type", type=str, default='tvl1', choices=['tvl1', 'warp_tvl1'])
+
+    args = parser.parse_args()
+
+    out_path = args.out_dir
+    src_path = args.src_dir
+    num_worker = args.num_worker
+    flow_type = args.flow_type
+
+
+    vid_list = glob.glob(src_path+'/*.mp4')
+    print len(vid_list)
+    pool = Pool(num_worker)
+    if flow_type == 'tvl1':
+        pool.map(run_optical_flow, zip(vid_list, xrange(len(vid_list))))
+    elif flow_type == 'warp_tvl1':
+        pool.map(run_warp_optical_flow, zip(vid_list, xrange(len(vid_list))))
diff --git a/cmake/Modules/FindLibZip.cmake b/cmake/Modules/FindLibZip.cmake
new file mode 100644
index 0000000..76ab224
--- /dev/null
+++ b/cmake/Modules/FindLibZip.cmake
@@ -0,0 +1,37 @@
+# Finds libzip.
+#
+# This module defines:
+# LIBZIP_INCLUDE_DIR_ZIP
+# LIBZIP_INCLUDE_DIR_ZIPCONF
+# LIBZIP_LIBRARY
+#
+
+find_package(PkgConfig)
+pkg_check_modules(PC_LIBZIP QUIET libzip)
+
+find_path(LIBZIP_INCLUDE_DIR_ZIP
+    NAMES zip.h
+    HINTS ${PC_LIBZIP_INCLUDE_DIRS})
+
+find_path(LIBZIP_INCLUDE_DIR_ZIPCONF
+    NAMES zipconf.h
+    HINTS ${PC_LIBZIP_INCLUDE_DIRS})
+
+find_library(LIBZIP_LIBRARY
+    NAMES zip)
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(
+    LIBZIP DEFAULT_MSG
+    LIBZIP_LIBRARY LIBZIP_INCLUDE_DIR_ZIP LIBZIP_INCLUDE_DIR_ZIPCONF)
+
+set(LIBZIP_VERSION 0)
+
+if (LIBZIP_INCLUDE_DIR_ZIPCONF)
+  FILE(READ "${LIBZIP_INCLUDE_DIR_ZIPCONF}/zipconf.h" _LIBZIP_VERSION_CONTENTS)
+  if (_LIBZIP_VERSION_CONTENTS)
+    STRING(REGEX REPLACE ".*#define LIBZIP_VERSION \"([0-9.]+)\".*" "\\1" LIBZIP_VERSION "${_LIBZIP_VERSION_CONTENTS}")
+  endif ()
+endif ()
+
+set(LIBZIP_VERSION ${LIBZIP_VERSION} CACHE STRING "Version number of libzip")
diff --git a/denseFlow.cpp b/denseFlow.cpp
deleted file mode 100644
index e15a46e..0000000
--- a/denseFlow.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-#include "opencv2/video/tracking.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/highgui/highgui.hpp"
-
-#include <stdio.h>
-#include <iostream>
-using namespace cv;
-
-static void convertFlowToImage(const Mat &flow_x, const Mat &flow_y, Mat &img_x, Mat &img_y, double lowerBound, double higherBound) {
-	#define CAST(v, L, H) ((v) > (H) ? 255 : (v) < (L) ? 0 : cvRound(255*((v) - (L))/((H)-(L))))
-	for (int i = 0; i < flow_x.rows; ++i) {
-		for (int j = 0; j < flow_y.cols; ++j) {
-			float x = flow_x.at<float>(i,j);
-			float y = flow_y.at<float>(i,j);
-			img_x.at<uchar>(i,j) = CAST(x, lowerBound, higherBound);
-			img_y.at<uchar>(i,j) = CAST(y, lowerBound, higherBound);
-		}
-	}
-	#undef CAST
-}
-
-static void drawOptFlowMap(const Mat& flow, Mat& cflowmap, int step,double, const Scalar& color)
-{
-    for(int y = 0; y < cflowmap.rows; y += step)
-        for(int x = 0; x < cflowmap.cols; x += step)
-        {
-            const Point2f& fxy = flow.at<Point2f>(y, x);
-            line(cflowmap, Point(x,y), Point(cvRound(x+fxy.x), cvRound(y+fxy.y)),
-                 color);
-            circle(cflowmap, Point(x,y), 2, color, -1);
-        }
-}
-
-int main(int argc, char** argv)
-{
-	// IO operation
-
-	const char* keys =
-		{
-			"{ f  | vidFile      | ex2.avi | filename of video }"
-			"{ x  | xFlowFile    | flow_x | filename of flow x component }"
-			"{ y  | yFlowFile    | flow_y | filename of flow x component }"
-			"{ i  | imgFile      | flow_i | filename of flow image}"
-			"{ b  | bound | 15 | specify the maximum of optical flow}"
-		};
-
-	CommandLineParser cmd(argc, argv, keys);
-	string vidFile = cmd.get<string>("vidFile");
-	string xFlowFile = cmd.get<string>("xFlowFile");
-	string yFlowFile = cmd.get<string>("yFlowFile");
-	string imgFile = cmd.get<string>("imgFile");
-	int bound = cmd.get<int>("bound");
-
-
-	VideoCapture capture(vidFile);
-	if(!capture.isOpened()) {
-		printf("Could not initialize capturing..\n");
-		return -1;
-	}
-
-	int frame_num = 0;
-	Mat image, prev_image, prev_grey, grey, frame, flow, cflow;
-
-	while(true) {
-		capture >> frame;
-		if(frame.empty())
-			break;
-      
-		if(frame_num == 0) {
-			image.create(frame.size(), CV_8UC3);
-			grey.create(frame.size(), CV_8UC1);
-			prev_image.create(frame.size(), CV_8UC3);
-			prev_grey.create(frame.size(), CV_8UC1);
-
-			frame.copyTo(prev_image);
-			cvtColor(prev_image, prev_grey, CV_BGR2GRAY);
-
-			frame_num++;
-			continue;
-		}
-
-		frame.copyTo(image);
-		cvtColor(image, grey, CV_BGR2GRAY);
-
-		// calcOpticalFlowFarneback(prev_grey,grey,flow,0.5, 3, 15, 3, 5, 1.2, 0 );
-    calcOpticalFlowFarneback(prev_grey, grey, flow, 0.702, 5, 10, 2, 7, 1.5, cv::OPTFLOW_FARNEBACK_GAUSSIAN );
-		
-		// prev_image.copyTo(cflow);
-		// drawOptFlowMap(flow, cflow, 12, 1.5, Scalar(0, 255, 0));
-
-		Mat flows[2];
-		split(flow,flows);
-		Mat imgX(flows[0].size(),CV_8UC1);
-		Mat imgY(flows[0].size(),CV_8UC1);
-		convertFlowToImage(flows[0],flows[1], imgX, imgY, -bound, bound);
-		char tmp[20];
-		sprintf(tmp,"_%04d.jpg",int(frame_num));
-		imwrite(xFlowFile + tmp,imgX);
-		imwrite(yFlowFile + tmp,imgY);
-		imwrite(imgFile + tmp, image);
-
-		std::swap(prev_grey, grey);
-		std::swap(prev_image, image);
-		frame_num = frame_num + 1;
-	}
-	return 0;
-}
diff --git a/denseFlow_gpu.cpp b/denseFlow_gpu.cpp
deleted file mode 100644
index 3e2983f..0000000
--- a/denseFlow_gpu.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-#include "opencv2/video/tracking.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/gpu/gpu.hpp"
-
-#include <stdio.h>
-#include <iostream>
-using namespace cv;
-using namespace cv::gpu;
-
-static void convertFlowToImage(const Mat &flow_x, const Mat &flow_y, Mat &img_x, Mat &img_y,
-       double lowerBound, double higherBound) {
-	#define CAST(v, L, H) ((v) > (H) ? 255 : (v) < (L) ? 0 : cvRound(255*((v) - (L))/((H)-(L))))
-	for (int i = 0; i < flow_x.rows; ++i) {
-		for (int j = 0; j < flow_y.cols; ++j) {
-			float x = flow_x.at<float>(i,j);
-			float y = flow_y.at<float>(i,j);
-			img_x.at<uchar>(i,j) = CAST(x, lowerBound, higherBound);
-			img_y.at<uchar>(i,j) = CAST(y, lowerBound, higherBound);
-		}
-	}
-	#undef CAST
-}
-
-static void drawOptFlowMap(const Mat& flow, Mat& cflowmap, int step,double, const Scalar& color){
-    for(int y = 0; y < cflowmap.rows; y += step)
-        for(int x = 0; x < cflowmap.cols; x += step)
-        {
-            const Point2f& fxy = flow.at<Point2f>(y, x);
-            line(cflowmap, Point(x,y), Point(cvRound(x+fxy.x), cvRound(y+fxy.y)),
-                 color);
-            circle(cflowmap, Point(x,y), 2, color, -1);
-        }
-}
-
-int main(int argc, char** argv){
-	// IO operation
-	const char* keys =
-		{
-			"{ f  | vidFile      | ex2.avi | filename of video }"
-			"{ x  | xFlowFile    | flow_x | filename of flow x component }"
-			"{ y  | yFlowFile    | flow_y | filename of flow x component }"
-			"{ i  | imgFile      | flow_i | filename of flow image}"
-			"{ b  | bound | 15 | specify the maximum of optical flow}"
-			"{ t  | type | 0 | specify the optical flow algorithm }"
-			"{ d  | device_id    | 0  | set gpu id}"
-			"{ s  | step  | 1 | specify the step for frame sampling}"
-		};
-
-	CommandLineParser cmd(argc, argv, keys);
-	string vidFile = cmd.get<string>("vidFile");
-	string xFlowFile = cmd.get<string>("xFlowFile");
-	string yFlowFile = cmd.get<string>("yFlowFile");
-	string imgFile = cmd.get<string>("imgFile");
-	int bound = cmd.get<int>("bound");
-    int type  = cmd.get<int>("type");
-    int device_id = cmd.get<int>("device_id");
-    int step = cmd.get<int>("step");
-
-	VideoCapture capture(vidFile);
-	if(!capture.isOpened()) {
-		printf("Could not initialize capturing..\n");
-		return -1;
-	}
-
-	int frame_num = 0;
-	Mat image, prev_image, prev_grey, grey, frame, flow_x, flow_y;
-	GpuMat frame_0, frame_1, flow_u, flow_v;
-
-	setDevice(device_id);
-	FarnebackOpticalFlow alg_farn;
-	OpticalFlowDual_TVL1_GPU alg_tvl1;
-	BroxOpticalFlow alg_brox(0.197f, 50.0f, 0.8f, 10, 77, 10);
-
-	while(true) {
-		capture >> frame;
-		if(frame.empty())
-			break;
-		if(frame_num == 0) {
-			image.create(frame.size(), CV_8UC3);
-			grey.create(frame.size(), CV_8UC1);
-			prev_image.create(frame.size(), CV_8UC3);
-			prev_grey.create(frame.size(), CV_8UC1);
-
-			frame.copyTo(prev_image);
-			cvtColor(prev_image, prev_grey, CV_BGR2GRAY);
-
-			frame_num++;
-
-			int step_t = step;
-			while (step_t > 1){
-				capture >> frame;
-				step_t--;
-			}
-			continue;
-		}
-
-		frame.copyTo(image);
-		cvtColor(image, grey, CV_BGR2GRAY);
-
-
-		frame_0.upload(prev_grey);
-		frame_1.upload(grey);
-
-
-        // GPU optical flow
-		switch(type){
-		case 0:
-			alg_farn(frame_0,frame_1,flow_u,flow_v);
-			break;
-		case 1:
-			alg_tvl1(frame_0,frame_1,flow_u,flow_v);
-			break;
-		case 2:
-			GpuMat d_frame0f, d_frame1f;
-	        frame_0.convertTo(d_frame0f, CV_32F, 1.0 / 255.0);
-	        frame_1.convertTo(d_frame1f, CV_32F, 1.0 / 255.0);
-			alg_brox(d_frame0f, d_frame1f, flow_u,flow_v);
-			break;
-		}
-
-		flow_u.download(flow_x);
-		flow_v.download(flow_y);
-
-		// Output optical flow
-		Mat imgX(flow_x.size(),CV_8UC1);
-		Mat imgY(flow_y.size(),CV_8UC1);
-		convertFlowToImage(flow_x,flow_y, imgX, imgY, -bound, bound);
-		char tmp[20];
-		sprintf(tmp,"_%04d.jpg",int(frame_num));
-
-		Mat imgX_, imgY_, image_;
-		resize(imgX,imgX_,cv::Size(340,256));
-		resize(imgY,imgY_,cv::Size(340,256));
-		resize(image,image_,cv::Size(340,256));
-
-		imwrite(xFlowFile + tmp,imgX_);
-		imwrite(yFlowFile + tmp,imgY_);
-		imwrite(imgFile + tmp, image_);
-
-		std::swap(prev_grey, grey);
-		std::swap(prev_image, image);
-		frame_num = frame_num + 1;
-
-		int step_t = step;
-		while (step_t > 1){
-			capture >> frame;
-			step_t--;
-		}
-	}
-	return 0;
-}
diff --git a/include/CLUE b/include/CLUE
new file mode 160000
index 0000000..1d81996
--- /dev/null
+++ b/include/CLUE
@@ -0,0 +1 @@
+Subproject commit 1d819960b815dd69d4adf606e09bc8bc69269c30
diff --git a/include/common.h b/include/common.h
new file mode 100644
index 0000000..6515c88
--- /dev/null
+++ b/include/common.h
@@ -0,0 +1,40 @@
+//
+// Created by yjxiong on 11/18/15.
+//
+
+#ifndef DENSEFLOW_COMMON_H_H
+#define DENSEFLOW_COMMON_H_H
+
+
+
+#include "opencv2/video/tracking.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+#include <stdio.h>
+#include <iostream>
+using namespace cv;
+using std::string;
+using std::vector;
+
+void convertFlowToImage(const Mat &flow_x, const Mat &flow_y, Mat &img_x, Mat &img_y,
+                        double lowerBound, double higherBound);
+void drawOptFlowMap(const Mat& flow, Mat& cflowmap, int step,double, const Scalar& color);
+
+void encodeFlowMap(const Mat& flow_map_x, const Mat& flow_map_y,
+                   std::vector<uchar>& encoded_x, std::vector<uchar>& encoded_y,
+                   int bound, bool to_jpg=true);
+
+inline void initializeMats(const Mat& frame,
+                           Mat& capture_image, Mat& capture_gray,
+                           Mat& prev_image, Mat& prev_gray){
+    capture_image.create(frame.size(), CV_8UC3);
+    capture_gray.create(frame.size(), CV_8UC1);
+
+    prev_image.create(frame.size(), CV_8UC3);
+    prev_gray.create(frame.size(), CV_8UC1);
+}
+
+void writeImages(std::vector<std::vector<uchar>> images, std::string name_temp);
+
+#endif //DENSEFLOW_COMMON_H_H
diff --git a/include/dense_flow.h b/include/dense_flow.h
new file mode 100644
index 0000000..44369a5
--- /dev/null
+++ b/include/dense_flow.h
@@ -0,0 +1,32 @@
+//
+// Created by yjxiong on 11/18/15.
+//
+
+#ifndef DENSEFLOW_DENSE_FLOW_H
+#define DENSEFLOW_DENSE_FLOW_H
+
+#include "common.h"
+#include "easylogging++.h"
+
+void calcDenseFlow(string file_name, int bound, int type, int step,
+                   vector<vector<uchar> >& output_x,
+                   vector<vector<uchar> >& output_y,
+                   vector<vector<uchar> >& output_img);
+void calcDenseFlowGPU(string file_name, int bound, int type, int step, int dev_id,
+                      vector<vector<uchar> >& output_x,
+                      vector<vector<uchar> >& output_y,
+                      vector<vector<uchar> >& output_img,
+                      int new_width=0, int new_height=0);
+
+void calcDenseFlowPureGPU(std::string file_name, int bound, int type, int step, int dev_id,
+                      std::vector<std::vector<uchar> >& output_x,
+                      std::vector<std::vector<uchar> >& output_y,
+                      std::vector<std::vector<uchar> >& output_img);
+
+void calcDenseWarpFlowGPU(std::string file_name, int bound, int type, int step, int dev_id,
+                          std::vector<std::vector<uchar> >& output_x,
+                          std::vector<std::vector<uchar> >& output_y);
+
+void MatchFromFlow_copy(const Mat& prev_grey, const Mat& flow_x, const Mat& flow_y, std::vector<Point2f>& prev_pts, std::vector<Point2f>& pts, const Mat& mask);
+
+#endif //DENSEFLOW_DENSE_FLOW_H
diff --git a/include/easylogging++ b/include/easylogging++
new file mode 160000
index 0000000..f926802
--- /dev/null
+++ b/include/easylogging++
@@ -0,0 +1 @@
+Subproject commit f926802dfbde716d82b64b8ef3c25b7f0fcfec65
diff --git a/include/utils.h b/include/utils.h
new file mode 100644
index 0000000..63eb22e
--- /dev/null
+++ b/include/utils.h
@@ -0,0 +1,12 @@
+//
+// Created by Yuanjun Xiong on 18/11/2015.
+//
+
+#ifndef DENSEFLOW_UTILS_H
+#define DENSEFLOW_UTILS_H
+
+#include "common.h"
+
+void writeZipFile(std::vector<std::vector<uchar> >& data, std::string name_temp, std::string archive_name);
+
+#endif //DENSEFLOW_UTILS_H
diff --git a/include/warp_flow.h b/include/warp_flow.h
new file mode 100644
index 0000000..a442c68
--- /dev/null
+++ b/include/warp_flow.h
@@ -0,0 +1,208 @@
+//
+// Created by alex on 16-5-25.
+//
+
+#ifndef DENSEFLOW_WARP_FLOW_H_H
+#define DENSEFLOW_WARP_FLOW_H_H
+
+cv::Mat windowedMatchingMask( const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2,
+                          float maxDeltaX, float maxDeltaY )
+{
+  if( keypoints1.empty() || keypoints2.empty() )
+    return cv::Mat();
+
+  int n1 = (int)keypoints1.size(), n2 = (int)keypoints2.size();
+  cv::Mat mask( n1, n2, CV_8UC1 );
+  for( int i = 0; i < n1; i++ )
+    {
+      for( int j = 0; j < n2; j++ )
+        {
+          cv::Point2f diff = keypoints2[j].pt - keypoints1[i].pt;
+          mask.at<uchar>(i, j) = std::abs(diff.x) < maxDeltaX && std::abs(diff.y) < maxDeltaY;
+        }
+    }
+  return mask;
+}
+
+void MyWarpPerspective(Mat& prev_src, Mat& src, Mat& dst, Mat& M0, int flags=INTER_LINEAR,
+                       int borderType=BORDER_CONSTANT, const Scalar& borderValue=Scalar())
+{
+    int width = src.cols;
+    int height = src.rows;
+    dst.create( height, width, CV_8UC1 );
+
+    Mat mask = Mat::zeros(height, width, CV_8UC1);
+    const int margin = 5;
+
+    const int BLOCK_SZ = 32;
+    short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
+
+    int interpolation = flags & INTER_MAX;
+    if( interpolation == INTER_AREA )
+        interpolation = INTER_LINEAR;
+
+    double M[9];
+    Mat matM(3, 3, CV_64F, M);
+    M0.convertTo(matM, matM.type());
+    if( !(flags & WARP_INVERSE_MAP) )
+        invert(matM, matM);
+
+    int x, y, x1, y1;
+
+    int bh0 = min(BLOCK_SZ/2, height);
+    int bw0 = min(BLOCK_SZ*BLOCK_SZ/bh0, width);
+    bh0 = min(BLOCK_SZ*BLOCK_SZ/bw0, height);
+
+    for( y = 0; y < height; y += bh0 ) {
+        for( x = 0; x < width; x += bw0 ) {
+            int bw = min( bw0, width - x);
+            int bh = min( bh0, height - y);
+
+            Mat _XY(bh, bw, CV_16SC2, XY);
+            Mat matA;
+            Mat dpart(dst, Rect(x, y, bw, bh));
+
+            for( y1 = 0; y1 < bh; y1++ ) {
+
+                short* xy = XY + y1*bw*2;
+                double X0 = M[0]*x + M[1]*(y + y1) + M[2];
+                double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
+                double W0 = M[6]*x + M[7]*(y + y1) + M[8];
+                short* alpha = A + y1*bw;
+
+                for( x1 = 0; x1 < bw; x1++ ) {
+
+                    double W = W0 + M[6]*x1;
+                    W = W ? INTER_TAB_SIZE/W : 0;
+                    double fX = max((double)INT_MIN, min((double)INT_MAX, (X0 + M[0]*x1)*W));
+                    double fY = max((double)INT_MIN, min((double)INT_MAX, (Y0 + M[3]*x1)*W));
+
+                    double _X = fX/double(INTER_TAB_SIZE);
+                    double _Y = fY/double(INTER_TAB_SIZE);
+
+                    if( _X > margin && _X < width-1-margin && _Y > margin && _Y < height-1-margin )
+                        mask.at<uchar>(y+y1, x+x1) = 1;
+
+                    int X = saturate_cast<int>(fX);
+                    int Y = saturate_cast<int>(fY);
+
+                    xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
+                    xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
+                    alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (X & (INTER_TAB_SIZE-1)));
+                }
+            }
+
+            Mat _matA(bh, bw, CV_16U, A);
+            remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
+        }
+    }
+
+    for( y = 0; y < height; y++ ) {
+        const uchar* m = mask.ptr<uchar>(y);
+        const uchar* s = prev_src.ptr<uchar>(y);
+        uchar* d = dst.ptr<uchar>(y);
+        for( x = 0; x < width; x++ ) {
+            if(m[x] == 0)
+                d[x] = s[x];
+        }
+    }
+}
+
+void ComputeMatch(const std::vector<KeyPoint>& prev_kpts, const std::vector<KeyPoint>& kpts,
+                  const Mat& prev_desc, const Mat& desc, std::vector<Point2f>& prev_pts, std::vector<Point2f>& pts)
+{
+    prev_pts.clear();
+    pts.clear();
+
+    if(prev_kpts.size() == 0 || kpts.size() == 0)
+        return;
+
+    Mat mask = windowedMatchingMask(kpts, prev_kpts, 25, 25);
+
+    BFMatcher desc_matcher(NORM_L2);
+    std::vector<DMatch> matches;
+
+    desc_matcher.match(desc, prev_desc, matches, mask);
+
+    prev_pts.reserve(matches.size());
+    pts.reserve(matches.size());
+
+    for(size_t i = 0; i < matches.size(); i++) {
+        const DMatch& dmatch = matches[i];
+        // get the point pairs that are successfully matched
+        prev_pts.push_back(prev_kpts[dmatch.trainIdx].pt);
+        pts.push_back(kpts[dmatch.queryIdx].pt);
+    }
+
+    return;
+}
+
+void MergeMatch(const std::vector<Point2f>& prev_pts1, const std::vector<Point2f>& pts1,
+                const std::vector<Point2f>& prev_pts2, const std::vector<Point2f>& pts2,
+                std::vector<Point2f>& prev_pts_all, std::vector<Point2f>& pts_all)
+{
+    prev_pts_all.clear();
+    prev_pts_all.reserve(prev_pts1.size() + prev_pts2.size());
+
+    pts_all.clear();
+    pts_all.reserve(pts1.size() + pts2.size());
+
+    for(size_t i = 0; i < prev_pts1.size(); i++) {
+        prev_pts_all.push_back(prev_pts1[i]);
+        pts_all.push_back(pts1[i]);
+    }
+
+    for(size_t i = 0; i < prev_pts2.size(); i++) {
+        prev_pts_all.push_back(prev_pts2[i]);
+        pts_all.push_back(pts2[i]);
+    }
+
+    return;
+}
+
+void MatchFromFlow(const Mat& prev_grey, const Mat& flow, std::vector<Point2f>& prev_pts, std::vector<Point2f>& pts, const Mat& mask)
+{
+    int width = prev_grey.cols;
+    int height = prev_grey.rows;
+    prev_pts.clear();
+    pts.clear();
+
+    const int MAX_COUNT = 1000;
+    goodFeaturesToTrack(prev_grey, prev_pts, MAX_COUNT, 0.001, 3, mask);
+
+    if(prev_pts.size() == 0)
+        return;
+
+    for(int i = 0; i < prev_pts.size(); i++) {
+        int x = std::min<int>(std::max<int>(cvRound(prev_pts[i].x), 0), width-1);
+        int y = std::min<int>(std::max<int>(cvRound(prev_pts[i].y), 0), height-1);
+
+        const float* f = flow.ptr<float>(y);
+        pts.push_back(Point2f(x+f[2*x], y+f[2*x+1]));
+    }
+}
+
+void MatchFromFlow_copy(const Mat& prev_grey, const Mat& flow_x, const Mat& flow_y, std::vector<Point2f>& prev_pts, std::vector<Point2f>& pts, const Mat& mask)
+{
+    int width = prev_grey.cols;
+    int height = prev_grey.rows;
+    prev_pts.clear();
+    pts.clear();
+
+    const int MAX_COUNT = 1000;
+    goodFeaturesToTrack(prev_grey, prev_pts, MAX_COUNT, 0.001, 3, mask);
+
+    if(prev_pts.size() == 0)
+        return;
+
+    for(int i = 0; i < prev_pts.size(); i++) {
+        int x = std::min<int>(std::max<int>(cvRound(prev_pts[i].x), 0), width-1);
+        int y = std::min<int>(std::max<int>(cvRound(prev_pts[i].y), 0), height-1);
+
+        const float* f_x = flow_x.ptr<float>(y);
+        const float* f_y = flow_y.ptr<float>(y);
+        pts.push_back(Point2f(x+f_x[x], y+f_y[y]));
+    }
+}
+
+#endif //DENSEFLOW_WARP_FLOW_H_H
diff --git a/extracOpticalFlow.m b/matlab/extracOpticalFlow.m
similarity index 88%
rename from extracOpticalFlow.m
rename to matlab/extracOpticalFlow.m
index 69f2675..ada79ce 100644
--- a/extracOpticalFlow.m
+++ b/matlab/extracOpticalFlow.m
@@ -16,14 +16,14 @@
     for j = 1:length(filelist)
         if ~exist([path2,foldername{i},'/',filelist(j).name(1:end-4)],'dir')
             mkdir([path2,foldername{i},'/',filelist(j).name(1:end-4)]);
-        end 
+        end
         file1 = [path1,foldername{i},'/',filelist(j).name];
         file2 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_x'];
         file3 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_y'];
 		file4 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_i'];
-            cmd = sprintf('./denseFlow -f %s -x %s -y %s -i %s -b 20',file1,file2,file3,file4);
+            cmd = sprintf('./extract_gpu -f %s -x %s -y %s -i %s -b 20',file1,file2,file3,file4);
             system(cmd);
 	end
 	i
 end
-end
\ No newline at end of file
+end
diff --git a/extractOpticalFlow.m b/matlab/extractOpticalFlow.m
similarity index 89%
rename from extractOpticalFlow.m
rename to matlab/extractOpticalFlow.m
index eb51bf7..9fa2c1a 100644
--- a/extractOpticalFlow.m
+++ b/matlab/extractOpticalFlow.m
@@ -16,14 +16,14 @@
     for j = 1:length(filelist)
         if ~exist([path2,foldername{i},'/',filelist(j).name(1:end-4)],'dir')
             mkdir([path2,foldername{i},'/',filelist(j).name(1:end-4)]);
-        end 
+        end
         file1 = [path1,foldername{i},'/',filelist(j).name];
         file2 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_x'];
         file3 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_y'];
 		file4 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_i'];
-        cmd = sprintf('./denseFlow -f %s -x %s -y %s -i %s -b 20',file1,file2,file3,file4);
+        cmd = sprintf('./extract_gpu -f %s -x %s -y %s -i %s -b 20',file1,file2,file3,file4);
         system(cmd);
 	end
 	i
 end
-end
\ No newline at end of file
+end
diff --git a/extractOpticalFlow_gpu.m b/matlab/extractOpticalFlow_gpu.m
similarity index 93%
rename from extractOpticalFlow_gpu.m
rename to matlab/extractOpticalFlow_gpu.m
index e7602d9..91e0d6b 100644
--- a/extractOpticalFlow_gpu.m
+++ b/matlab/extractOpticalFlow_gpu.m
@@ -31,15 +31,15 @@
     for j = 1:length(filelist)
         if ~exist([path2,foldername{i},'/',filelist(j).name(1:end-4)],'dir')
             mkdir([path2,foldername{i},'/',filelist(j).name(1:end-4)]);
-        end 
+        end
         file1 = [path1,foldername{i},'/',filelist(j).name];
         file2 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_x'];
         file3 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_y'];
 		file4 = [path2,foldername{i},'/',filelist(j).name(1:end-4),'/','flow_i'];
-        cmd = sprintf('./denseFlow_gpu -f %s -x %s -y %s -i %s -b 20 -t %d -d %d -s %d',...
+        cmd = sprintf('./extract_gpu -f %s -x %s -y %s -i %s -b 20 -t %d -d %d -s %d',...
             file1,file2,file3,file4,type,device_id,1);
         system(cmd);
 	end
 	i
 end
-end
\ No newline at end of file
+end
diff --git a/src/common.cpp b/src/common.cpp
new file mode 100644
index 0000000..b39c2f3
--- /dev/null
+++ b/src/common.cpp
@@ -0,0 +1,61 @@
+//
+// Created by yjxiong on 11/18/15.
+//
+
+#include "common.h"
+
+void convertFlowToImage(const Mat &flow_x, const Mat &flow_y, Mat &img_x, Mat &img_y,
+                               double lowerBound, double higherBound) {
+#define CAST(v, L, H) ((v) > (H) ? 255 : (v) < (L) ? 0 : cvRound(255*((v) - (L))/((H)-(L))))
+    for (int i = 0; i < flow_x.rows; ++i) {
+        for (int j = 0; j < flow_y.cols; ++j) {
+            float x = flow_x.at<float>(i,j);
+            float y = flow_y.at<float>(i,j);
+            img_x.at<uchar>(i,j) = CAST(x, lowerBound, higherBound);
+            img_y.at<uchar>(i,j) = CAST(y, lowerBound, higherBound);
+        }
+    }
+#undef CAST
+}
+
+void drawOptFlowMap(const Mat& flow, Mat& cflowmap, int step,double, const Scalar& color){
+    for(int y = 0; y < cflowmap.rows; y += step)
+        for(int x = 0; x < cflowmap.cols; x += step)
+        {
+            const Point2f& fxy = flow.at<Point2f>(y, x);
+            line(cflowmap, Point(x,y), Point(cvRound(x+fxy.x), cvRound(y+fxy.y)),
+                 color);
+            circle(cflowmap, Point(x,y), 2, color, -1);
+        }
+}
+
+void encodeFlowMap(const Mat& flow_map_x, const Mat& flow_map_y,
+                   std::vector<uchar>& encoded_x, std::vector<uchar>& encoded_y,
+                   int bound, bool to_jpg){
+    Mat flow_img_x(flow_map_x.size(), CV_8UC1);
+    Mat flow_img_y(flow_map_y.size(), CV_8UC1);
+
+    convertFlowToImage(flow_map_x, flow_map_y, flow_img_x, flow_img_y,
+                       -bound, bound);
+
+    if (to_jpg) {
+        imencode(".jpg", flow_img_x, encoded_x);
+        imencode(".jpg", flow_img_y, encoded_y);
+    }else {
+        encoded_x.resize(flow_img_x.total());
+        encoded_y.resize(flow_img_y.total());
+        memcpy(encoded_x.data(), flow_img_x.data, flow_img_x.total());
+        memcpy(encoded_y.data(), flow_img_y.data, flow_img_y.total());
+    }
+}
+
+void writeImages(std::vector<std::vector<uchar>> images, std::string name_temp){
+    for (int i = 0; i < images.size(); ++i){
+        char tmp[256];
+        sprintf(tmp, "_%05d.jpg", i+1);
+        FILE* fp;
+        fp = fopen((name_temp + tmp).c_str(), "wb");
+        fwrite( images[i].data(), 1, images[i].size(), fp);
+        fclose(fp);
+    }
+}
diff --git a/src/dense_flow.cpp b/src/dense_flow.cpp
new file mode 100644
index 0000000..15c0842
--- /dev/null
+++ b/src/dense_flow.cpp
@@ -0,0 +1,73 @@
+//
+// Created by yjxiong on 11/18/15.
+//
+#include "common.h"
+#include "dense_flow.h"
+#include "opencv2/optflow.hpp"
+
+void calcDenseFlow(std::string file_name, int bound, int type, int step,
+                   std::vector<std::vector<uchar> >& output_x,
+                   std::vector<std::vector<uchar> >& output_y,
+                   std::vector<std::vector<uchar> >& output_img){
+
+    VideoCapture video_stream(file_name);
+    CHECK(video_stream.isOpened())<<"Cannot open video stream \""
+                                  <<file_name
+                                  <<"\" for optical flow extraction.";
+
+    Mat capture_frame, capture_image, prev_image, capture_gray, prev_gray;
+    Mat flow, flow_split[2];
+
+    cv::Ptr<cv::optflow::DualTVL1OpticalFlow> alg_tvl1 = cv::optflow::DualTVL1OpticalFlow::create();
+
+    bool initialized = false;
+    for(int iter = 0;; iter++){
+        video_stream >> capture_frame;
+        if (capture_frame.empty()) break; // read frames until end
+
+        //build mats for the first frame
+        if (!initialized){
+            initializeMats(capture_frame, capture_image, capture_gray,
+                           prev_image, prev_gray);
+            capture_frame.copyTo(prev_image);
+            cvtColor(prev_image, prev_gray, cv::COLOR_BGR2GRAY);
+            initialized = true;
+//            LOG(INFO)<<"Initialized";
+        }else if(iter % step == 0){
+            capture_frame.copyTo(capture_image);
+            cvtColor(capture_image, capture_gray, cv::COLOR_BGR2GRAY);
+
+            switch(type){
+                case 0: {
+                    calcOpticalFlowFarneback(prev_gray, capture_gray, flow,
+                                             0.702, 5, 10, 2, 7, 1.5,
+                                             cv::OPTFLOW_FARNEBACK_GAUSSIAN );
+                    break;
+                }
+                case 1: {
+                    alg_tvl1->calc(prev_gray, capture_gray, flow);
+                    break;
+                }
+                default:
+                    LOG(WARNING)<<"Unknown optical method. Using Farneback";
+                    calcOpticalFlowFarneback(prev_gray, capture_gray, flow,
+                                             0.702, 5, 10, 2, 7, 1.5,
+                                             cv::OPTFLOW_FARNEBACK_GAUSSIAN );
+            }
+
+            std::vector<uchar> str_x, str_y, str_img;
+            split(flow, flow_split);
+            encodeFlowMap(flow_split[0], flow_split[1], str_x, str_y, bound);
+            imencode(".jpg", capture_image, str_img);
+
+            output_x.push_back(str_x);
+            output_y.push_back(str_y);
+            output_img.push_back(str_img);
+//            LOG(INFO)<<iter;
+
+            std::swap(prev_gray, capture_gray);
+            std::swap(prev_image, capture_image);
+        }
+    }
+
+}
diff --git a/src/dense_flow_gpu.cpp b/src/dense_flow_gpu.cpp
new file mode 100644
index 0000000..9b4ce51
--- /dev/null
+++ b/src/dense_flow_gpu.cpp
@@ -0,0 +1,226 @@
+//
+// Created by yjxiong on 11/18/15.
+//
+#include "dense_flow.h"
+#include "opencv2/xfeatures2d.hpp"
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudaoptflow.hpp"
+#include "opencv2/cudacodec.hpp"
+
+using namespace cv::cuda;
+
+
+void calcDenseFlowGPU(string file_name, int bound, int type, int step, int dev_id,
+                      vector<vector<uchar> >& output_x,
+                      vector<vector<uchar> >& output_y,
+                      vector<vector<uchar> >& output_img,
+                      int new_width, int new_height){
+    VideoCapture video_stream(file_name);
+    CHECK(video_stream.isOpened())<<"Cannot open video stream \""
+                                  <<file_name
+                                  <<"\" for optical flow extraction.";
+
+    setDevice(dev_id);
+    Mat capture_frame, capture_image, prev_image, capture_gray, prev_gray;
+    Mat flow_x, flow_y;
+    Size new_size(new_width, new_height);
+
+    GpuMat d_frame_0, d_frame_1;
+    GpuMat d_flow;
+
+    cv::Ptr<cuda::FarnebackOpticalFlow> alg_farn = cuda::FarnebackOpticalFlow::create();
+    cv::Ptr<cuda::OpticalFlowDual_TVL1> alg_tvl1 = cuda::OpticalFlowDual_TVL1::create();
+    cv::Ptr<cuda::BroxOpticalFlow> alg_brox      = cuda::BroxOpticalFlow::create(0.197f, 50.0f, 0.8f, 10, 77, 10);
+
+    bool do_resize = (new_height > 0) && (new_width > 0);
+
+    bool initialized = false;
+    int cnt = 0;
+    while(true){
+
+        //build mats for the first frame
+        if (!initialized){
+           video_stream >> capture_frame;
+           if (capture_frame.empty()) return; // read frames until end
+
+            if (!do_resize){
+                initializeMats(capture_frame, capture_image, capture_gray,
+                           prev_image, prev_gray);
+                capture_frame.copyTo(prev_image);
+            }else{
+                capture_image.create(new_size, CV_8UC3);
+                capture_gray.create(new_size, CV_8UC1);
+                prev_image.create(new_size, CV_8UC3);
+                prev_gray.create(new_size, CV_8UC1);
+                cv::resize(capture_frame, prev_image, new_size);
+            }
+            cvtColor(prev_image, prev_gray, COLOR_BGR2GRAY);
+            initialized = true;
+            for(int s = 0; s < step; ++s){
+                video_stream >> capture_frame;
+        cnt ++;
+                if (capture_frame.empty()) return; // read frames until end
+            }
+        }else {
+            if (!do_resize)
+                capture_frame.copyTo(capture_image);
+            else
+                cv::resize(capture_frame, capture_image, new_size);
+
+            cvtColor(capture_image, capture_gray, COLOR_BGR2GRAY);
+            d_frame_0.upload(prev_gray);
+            d_frame_1.upload(capture_gray);
+
+            switch(type){
+                case 0: {
+                    alg_farn->calc(d_frame_0, d_frame_1, d_flow);
+                    break;
+                }
+                case 1: {
+                    alg_tvl1->calc(d_frame_0, d_frame_1, d_flow);
+                    break;
+                }
+                case 2: {
+                    GpuMat d_buf_0, d_buf_1;
+                    d_frame_0.convertTo(d_buf_0, CV_32F, 1.0 / 255.0);
+                    d_frame_1.convertTo(d_buf_1, CV_32F, 1.0 / 255.0);
+                    alg_brox->calc(d_buf_0, d_buf_1, d_flow);
+                    break;
+                }
+                default:
+                    LOG(ERROR)<<"Unknown optical method: "<<type;
+            }
+
+            //prefetch while gpu is working
+            bool hasnext = true;
+            for(int s = 0; s < step; ++s){
+                video_stream >> capture_frame;
+		cnt ++;
+                hasnext = !capture_frame.empty();
+                // read frames until end
+            }
+
+            GpuMat planes[2];
+            cuda::split(d_flow, planes);
+
+            //get back flow map
+            Mat flow_x(planes[0]);
+            Mat flow_y(planes[1]);
+
+            std::vector<uchar> str_x, str_y, str_img;
+            encodeFlowMap(flow_x, flow_y, str_x, str_y, bound);
+            imencode(".jpg", capture_image, str_img);
+
+            output_x.push_back(str_x);
+            output_y.push_back(str_y);
+            output_img.push_back(str_img);
+
+            std::swap(prev_gray, capture_gray);
+            std::swap(prev_image, capture_image);
+
+            if (!hasnext){
+                return;
+            }
+        }
+
+
+    }
+
+}
+
+/**
+ * This function use pure GPU backend for video loading and optical flow calculation
+ */
+void calcDenseFlowPureGPU(std::string file_name, int bound, int type, int step, int dev_id,
+                      std::vector<std::vector<uchar> >& output_x,
+                      std::vector<std::vector<uchar> >& output_y,
+                      std::vector<std::vector<uchar> >& output_img){
+
+    setDevice(dev_id);
+    cv::Ptr<cudacodec::VideoReader> video_stream = cudacodec::createVideoReader(file_name);
+//    VideoCapture video_stream(file_name);
+    //CHECK(video_stream->isOpened())<<"Cannot open video stream \""
+    //                              <<file_name
+    //                              <<"\" for optical flow extraction.";
+
+    GpuMat capture_frame, capture_image, prev_image, capture_gray, prev_gray;
+    Mat flow_x, flow_y, img;
+
+    GpuMat d_flow;
+
+    cv::Ptr<cuda::FarnebackOpticalFlow> alg_farn = cuda::FarnebackOpticalFlow::create();
+    cv::Ptr<cuda::OpticalFlowDual_TVL1> alg_tvl1 = cuda::OpticalFlowDual_TVL1::create();
+    cv::Ptr<cuda::BroxOpticalFlow> alg_brox      = cuda::BroxOpticalFlow::create(0.197f, 50.0f, 0.8f, 10, 77, 10);
+
+    bool initialized = false;
+    while(true){
+
+        //build mats for the first frame
+        if (!initialized){
+            bool success = video_stream->nextFrame(capture_frame);
+            if (!success) break; // read frames until end
+            capture_image.create(capture_frame.size(), CV_8UC3);
+            capture_gray.create(capture_frame.size(), CV_8UC1);
+
+            prev_image.create(capture_frame.size(), CV_8UC3);
+            prev_gray.create(capture_frame.size(), CV_8UC1);
+
+            capture_frame.copyTo(prev_image);
+            cvtColor(prev_image, prev_gray, COLOR_BGR2GRAY);
+            initialized = true;
+
+            for (int s = 0; s < step; ++s){
+                video_stream->nextFrame(capture_frame);
+            }
+        }else {
+            capture_frame.copyTo(capture_image);
+            cvtColor(capture_image, capture_gray, COLOR_BGR2GRAY);
+
+            switch(type){
+                case 0: {
+                    alg_farn->calc(prev_gray, capture_gray, d_flow);
+                    break;
+                }
+                case 1: {
+                    alg_tvl1->calc(prev_gray, capture_gray, d_flow);
+                    break;
+                }
+                case 2: {
+                    GpuMat d_buf_0, d_buf_1;
+                    prev_gray.convertTo(d_buf_0, CV_32F, 1.0 / 255.0);
+                    capture_gray.convertTo(d_buf_1, CV_32F, 1.0 / 255.0);
+                    alg_brox->calc(d_buf_0, d_buf_1, d_flow);
+                    break;
+                }
+                default:
+                    LOG(ERROR)<<"Unknown optical method: "<<type;
+            }
+
+            for (int s = 0; s < step; ++s){
+                if (!video_stream->nextFrame(capture_frame)) break;
+            }
+
+            GpuMat planes[2];
+            cuda::split(d_flow, planes);
+
+            //get back flow map
+            Mat flow_x(planes[0]);
+            Mat flow_y(planes[1]);
+            capture_image.download(img);
+
+            std::vector<uchar> str_x, str_y, str_img;
+            encodeFlowMap(flow_x, flow_y, str_x, str_y, bound);
+            imencode(".jpg", img, str_img);
+
+            output_x.push_back(str_x);
+            output_y.push_back(str_y);
+            output_img.push_back(str_img);
+
+            std::swap(prev_gray, capture_gray);
+            std::swap(prev_image, capture_image);
+        }
+
+
+    }
+
+}
diff --git a/src/dense_warp_flow_gpu.cpp b/src/dense_warp_flow_gpu.cpp
new file mode 100644
index 0000000..ed5b382
--- /dev/null
+++ b/src/dense_warp_flow_gpu.cpp
@@ -0,0 +1,188 @@
+#include "common.h"
+#include "dense_flow.h"
+
+#include "opencv2/video/tracking.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/features2d/features2d.hpp"
+#include "opencv2/core/core.hpp"
+#include "opencv2/xfeatures2d.hpp"
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudaoptflow.hpp"
+#include "opencv2/cudacodec.hpp"
+
+#include <stdio.h>
+#include <iostream>
+
+#include "warp_flow.h"
+
+using namespace cv;
+using namespace cv::cuda;
+using namespace std;
+
+void calcDenseWarpFlowGPU(string file_name, int bound, int type, int step, int dev_id,
+					  std::vector<std::vector<uchar> >& output_x,
+					  std::vector<std::vector<uchar> >& output_y){
+	VideoCapture video_stream(file_name);
+	CHECK(video_stream.isOpened())<<"Cannot open video stream \""
+								  <<file_name
+								  <<"\" for optical flow extraction.";
+
+    // OpenCV 3.1.0 SURF interface
+    //
+    // source: http://stackoverflow.com/a/27533437/957997
+    //  http://stackoverflow.com/questions/27533203/how-do-i-use-sift-in-opencv-3-0-with-c
+    cv::Ptr<Feature2D> detector_surf = xfeatures2d::SurfFeatureDetector::create(200);
+    cv::Ptr<Feature2D> extractor_surf = xfeatures2d::SurfDescriptorExtractor::create(true, true);
+	std::vector<Point2f> prev_pts_flow, pts_flow;
+	std::vector<Point2f> prev_pts_surf, pts_surf;
+	std::vector<Point2f> prev_pts_all, pts_all;
+	std::vector<KeyPoint> prev_kpts_surf, kpts_surf;
+	Mat prev_desc_surf, desc_surf;
+
+	setDevice(dev_id);
+	Mat capture_frame, capture_image, prev_image, capture_gray, prev_gray, human_mask;
+	Mat flow_x, flow_y;
+
+	GpuMat d_frame_0, d_frame_1;
+    GpuMat d_flow;
+
+	cv::Ptr<cuda::FarnebackOpticalFlow> alg_farn = cuda::FarnebackOpticalFlow::create();
+	cv::Ptr<cuda::OpticalFlowDual_TVL1> alg_tvl1 = cuda::OpticalFlowDual_TVL1::create();
+	cv::Ptr<cuda::BroxOpticalFlow> alg_brox = cuda::BroxOpticalFlow::create(0.197f, 50.0f, 0.8f, 10, 77, 10);
+
+	bool initialized = false;
+	int cnt = 0;
+	while(true){
+
+		//build mats for the first frame
+		if (!initialized){
+			video_stream >> capture_frame;
+			if (capture_frame.empty()) return; // read frames until end
+			initializeMats(capture_frame, capture_image, capture_gray,
+						   prev_image, prev_gray);
+			capture_frame.copyTo(prev_image);
+			cvtColor(prev_image, prev_gray, COLOR_BGR2GRAY);
+
+			//detect key points
+			human_mask = Mat::ones(capture_frame.size(), CV_8UC1);
+			detector_surf->detect(prev_gray, prev_kpts_surf, human_mask);
+			extractor_surf->compute(prev_gray, prev_kpts_surf, prev_desc_surf);
+            // TODO! check detector_surf->detectAndCompute()
+
+			initialized = true;
+			for(int s = 0; s < step; ++s){
+				video_stream >> capture_frame;
+				cnt ++;
+				if (capture_frame.empty()) return; // read frames until end
+			}
+		}else {
+			capture_frame.copyTo(capture_image);
+			cvtColor(capture_image, capture_gray, COLOR_BGR2GRAY);
+			d_frame_0.upload(prev_gray);
+			d_frame_1.upload(capture_gray);
+
+			switch(type){
+				case 0: {
+					alg_farn->calc(d_frame_0, d_frame_1, d_flow);
+					break;
+				}
+				case 1: {
+					alg_tvl1->calc(d_frame_0, d_frame_1, d_flow);
+					break;
+				}
+				case 2: {
+					GpuMat d_buf_0, d_buf_1;
+					d_frame_0.convertTo(d_buf_0, CV_32F, 1.0 / 255.0);
+					d_frame_1.convertTo(d_buf_1, CV_32F, 1.0 / 255.0);
+					alg_brox->calc(d_buf_0, d_buf_1, d_flow);
+					break;
+				}
+				default:
+					LOG(ERROR)<<"Unknown optical method: "<<type;
+			}
+
+            GpuMat planes[2];
+            cuda::split(d_flow, planes);
+
+			//get back flow map
+            Mat flow_x(planes[0]);
+            Mat flow_y(planes[1]);
+
+			// warp to reduce holistic motion
+			detector_surf->detect(capture_gray, kpts_surf, human_mask);
+			extractor_surf->compute(capture_gray, kpts_surf, desc_surf);
+			ComputeMatch(prev_kpts_surf, kpts_surf, prev_desc_surf, desc_surf, prev_pts_surf, pts_surf);
+			MatchFromFlow_copy(capture_gray, flow_x, flow_y, prev_pts_flow, pts_flow, human_mask);
+			MergeMatch(prev_pts_flow, pts_flow, prev_pts_surf, pts_surf, prev_pts_all, pts_all);
+			Mat H = Mat::eye(3, 3, CV_64FC1);
+			if(pts_all.size() > 50) {
+				std::vector<unsigned char> match_mask;
+				Mat temp = findHomography(prev_pts_all, pts_all, RANSAC, 1, match_mask);
+				if(cv::countNonZero(Mat(match_mask)) > 25)
+					H = temp;
+			}
+
+			Mat H_inv = H.inv();
+			Mat gray_warp = Mat::zeros(capture_gray.size(), CV_8UC1);
+			MyWarpPerspective(prev_gray, capture_gray, gray_warp, H_inv);
+
+			// re-extract flow on warped images
+			d_frame_0.upload(prev_gray);
+			d_frame_1.upload(gray_warp);
+
+			switch(type){
+				case 0: {
+					alg_farn->calc(d_frame_0, d_frame_1, d_flow);
+					break;
+				}
+				case 1: {
+					alg_tvl1->calc(d_frame_0, d_frame_1, d_flow);
+					break;
+				}
+				case 2: {
+					GpuMat d_buf_0, d_buf_1;
+					d_frame_0.convertTo(d_buf_0, CV_32F, 1.0 / 255.0);
+					d_frame_1.convertTo(d_buf_1, CV_32F, 1.0 / 255.0);
+					alg_brox->calc(d_buf_0, d_buf_1, d_flow);
+					break;
+				}
+				default:
+					LOG(ERROR)<<"Unknown optical method: "<<type;
+			}
+
+
+			//get back flow map
+            cuda::split(d_flow, planes);
+            planes[0].download(flow_x);
+            planes[1].download(flow_y);
+
+			vector<uchar> str_x, str_y;
+			encodeFlowMap(flow_x, flow_y, str_x, str_y, bound);
+
+			output_x.push_back(str_x);
+			output_y.push_back(str_y);
+
+			std::swap(prev_gray, capture_gray);
+			std::swap(prev_image, capture_image);
+
+
+			//get next frame
+			bool hasnext = true;
+			for(int s = 0; s < step; ++s){
+				video_stream >> capture_frame;
+				cnt ++;
+				hasnext = !capture_frame.empty();
+				// read frames until end
+			}
+			if (!hasnext){
+				return;
+			}
+		}
+
+
+	}
+}
diff --git a/src/py_denseflow.cpp b/src/py_denseflow.cpp
new file mode 100644
index 0000000..cfee077
--- /dev/null
+++ b/src/py_denseflow.cpp
@@ -0,0 +1,204 @@
+#include <iostream>
+#include <boost/python.hpp>
+#include <Python.h>
+#include <vector>
+
+
+#include "common.h"
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudaoptflow.hpp"
+#include "opencv2/cudacodec.hpp"
+
+#include "opencv2/video/tracking.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/features2d/features2d.hpp"
+#include "opencv2/core/core.hpp"
+#include "opencv2/xfeatures2d.hpp"
+
+#include "warp_flow.h"
+
+using namespace cv::cuda;
+using namespace cv;
+
+namespace bp = boost::python;
+
+class TVL1FlowExtractor{
+public:
+
+    TVL1FlowExtractor(int bound){
+        alg_tvl1 = cuda::OpticalFlowDual_TVL1::create();
+        bound_ = bound;
+    }
+
+    static void set_device(int dev_id){
+        setDevice(dev_id);
+    }
+
+    bp::list extract_flow(bp::list frames, int img_width, int img_height){
+        bp::list output;
+        Mat input_frame, prev_frame, next_frame, prev_gray, next_gray;
+        Mat flow_x, flow_y;
+
+
+
+
+        // initialize the first frame
+        const char* first_data = ((const char*)bp::extract<const char*>(frames[0]));
+        input_frame = Mat(img_height, img_width, CV_8UC3);
+        initializeMats(input_frame, prev_frame, prev_gray, next_frame, next_gray);
+
+        memcpy(prev_frame.data, first_data, bp::len(frames[0]));
+        cvtColor(prev_frame, prev_gray, COLOR_BGR2GRAY);
+        for (int idx = 1; idx < bp::len(frames); idx++){
+            const char* this_data = ((const char*)bp::extract<const char*>(frames[idx]));
+            memcpy(next_frame.data, this_data, bp::len(frames[0]));
+            cvtColor(next_frame, next_gray, COLOR_BGR2GRAY);
+
+            d_frame_0.upload(prev_gray);
+            d_frame_1.upload(next_gray);
+
+            alg_tvl1->calc(d_frame_0, d_frame_1, d_flow);
+
+            GpuMat planes[2];
+            cuda::split(d_flow, planes);
+            planes[0].download(flow_x);
+            planes[1].download(flow_y);
+
+            std::vector<uchar> str_x, str_y;
+
+            encodeFlowMap(flow_x, flow_y, str_x, str_y, bound_, false);
+            output.append(
+                bp::make_tuple(
+                    bp::str((const char*) str_x.data(), str_x.size()),
+                    bp::str((const char*) str_y.data(), str_y.size())
+                    )
+            );
+
+            std::swap(prev_gray, next_gray);
+        }
+        return output;
+    };
+private:
+    int bound_;
+    GpuMat d_frame_0, d_frame_1;
+    GpuMat d_flow;
+    cv::Ptr<cuda::OpticalFlowDual_TVL1> alg_tvl1;
+};
+
+
+
+class TVL1WarpFlowExtractor {
+public:
+
+    TVL1WarpFlowExtractor(int bound) {
+        alg_tvl1 = cuda::OpticalFlowDual_TVL1::create();
+        detector_surf = xfeatures2d::SurfFeatureDetector::create(200);
+        extractor_surf = xfeatures2d::SurfDescriptorExtractor::create(true, true);
+        bound_ = bound;
+    }
+
+    static void set_device(int dev_id){
+        setDevice(dev_id);
+    }
+
+    bp::list extract_warp_flow(bp::list frames, int img_width, int img_height){
+        bp::list output;
+        Mat input_frame, prev_frame, next_frame, prev_gray, next_gray, human_mask;
+        Mat flow_x, flow_y;
+
+        // initialize the first frame
+        const char* first_data = ((const char*)bp::extract<const char*>(frames[0]));
+        input_frame = Mat(img_height, img_width, CV_8UC3);
+        initializeMats(input_frame, prev_frame, prev_gray, next_frame, next_gray);
+        human_mask = Mat::ones(input_frame.size(), CV_8UC1);
+
+        memcpy(prev_frame.data, first_data, bp::len(frames[0]));
+        cvtColor(prev_frame, prev_gray, COLOR_BGR2GRAY);
+        for (int idx = 1; idx < bp::len(frames); idx++){
+            const char* this_data = ((const char*)bp::extract<const char*>(frames[idx]));
+            memcpy(next_frame.data, this_data, bp::len(frames[0]));
+            cvtColor(next_frame, next_gray, COLOR_BGR2GRAY);
+
+            d_frame_0.upload(prev_gray);
+            d_frame_1.upload(next_gray);
+
+            alg_tvl1->calc(d_frame_0, d_frame_1, d_flow);
+
+            GpuMat planes[2];
+            cuda::split(d_flow, planes);
+            planes[0].download(flow_x);
+            planes[1].download(flow_y);
+
+            // warp to reduce holistic motion
+            detector_surf->detect(next_gray, kpts_surf, human_mask);
+            extractor_surf->compute(next_gray, kpts_surf, desc_surf);
+            ComputeMatch(prev_kpts_surf, kpts_surf, prev_desc_surf, desc_surf, prev_pts_surf, pts_surf);
+            MatchFromFlow_copy(next_gray, flow_x, flow_y, prev_pts_flow, pts_flow, human_mask);
+            MergeMatch(prev_pts_flow, pts_flow, prev_pts_surf, pts_surf, prev_pts_all, pts_all);
+            Mat H = Mat::eye(3, 3, CV_64FC1);
+            if(pts_all.size() > 50) {
+                std::vector<unsigned char> match_mask;
+                Mat temp = findHomography(prev_pts_all, pts_all, RANSAC, 1, match_mask);
+                if(cv::countNonZero(Mat(match_mask)) > 25)
+                    H = temp;
+            }
+
+            Mat H_inv = H.inv();
+            Mat gray_warp = Mat::zeros(next_gray.size(), CV_8UC1);
+            MyWarpPerspective(prev_gray, next_gray, gray_warp, H_inv);
+
+            d_frame_0.upload(prev_gray);
+            d_frame_1.upload(gray_warp);
+
+            alg_tvl1->calc(d_frame_0, d_frame_1, d_flow);
+
+            cuda::split(d_flow, planes);
+            planes[0].download(flow_x);
+            planes[1].download(flow_y);
+
+            std::vector<uchar> str_x, str_y;
+
+            encodeFlowMap(flow_x, flow_y, str_x, str_y, bound_, false);
+            output.append(
+                    bp::make_tuple(
+                            bp::str((const char*) str_x.data(), str_x.size()),
+                            bp::str((const char*) str_y.data(), str_y.size())
+                    )
+            );
+
+            std::swap(prev_gray, next_gray);
+        }
+        return output;
+    }
+private:
+    cv::Ptr<Feature2D> detector_surf;
+    cv::Ptr<Feature2D> extractor_surf;
+    std::vector<Point2f> prev_pts_flow, pts_flow;
+    std::vector<Point2f> prev_pts_surf, pts_surf;
+    std::vector<Point2f> prev_pts_all, pts_all;
+    std::vector<KeyPoint> prev_kpts_surf, kpts_surf;
+    Mat prev_desc_surf, desc_surf;
+
+    GpuMat d_frame_0, d_frame_1;
+    GpuMat d_flow;
+
+    cv::Ptr<cuda::OpticalFlowDual_TVL1> alg_tvl1;
+    int bound_;
+};
+
+
+//// Boost Python Related Decl
+BOOST_PYTHON_MODULE(libpydenseflow){
+    bp::class_<TVL1FlowExtractor>("TVL1FlowExtractor", bp::init<int>())
+            .def("extract_flow", &TVL1FlowExtractor::extract_flow)
+            .def("set_device", &TVL1FlowExtractor::set_device)
+            .staticmethod("set_device");
+    bp::class_<TVL1WarpFlowExtractor>("TVL1WarpFlowExtractor", bp::init<int>())
+            .def("extract_warp_flow", &TVL1WarpFlowExtractor::extract_warp_flow)
+            .def("set_device", &TVL1WarpFlowExtractor::set_device)
+            .staticmethod("set_device");
+}
diff --git a/src/zip_utils.cpp b/src/zip_utils.cpp
new file mode 100644
index 0000000..32a7fa0
--- /dev/null
+++ b/src/zip_utils.cpp
@@ -0,0 +1,36 @@
+//
+// Created by Yuanjun Xiong on 18/11/2015.
+//
+
+#include "utils.h"
+#include "zip.h"
+#include "easylogging++.h"
+
+void writeZipFile(std::vector<std::vector<uchar> >& data, std::string name_temp, std::string archive_name){
+    int err=0;
+#ifdef USE_OBSEL_LIBZIP
+    struct zip* archive = zip_open(archive_name.c_str(), ZIP_CREATE, &err);
+#else
+    struct zip* archive = zip_open(archive_name.c_str(), ZIP_CREATE|ZIP_TRUNCATE, &err);
+#endif
+
+    CHECK_EQ(err, 0)<<"Failed to open Zip file with error code: "<<err;
+    char name[256];
+    struct zip_source *src_ptr;
+    for (int i = 0; i < data.size(); ++i) {
+
+        sprintf(name, name_temp.c_str(), i+1);
+        if ((src_ptr = zip_source_buffer(archive, data[i].data(), data[i].size(), 0)) == NULL ||
+#ifdef USE_OBSEL_LIBZIP
+            zip_add(archive, name, src_ptr) < 0) {
+#else
+            zip_file_add(archive, name, src_ptr, ZIP_FL_ENC_UTF_8) < 0) {
+#endif
+            zip_source_free(src_ptr);
+            LOG(FATAL)<<"error adding file "<<name<<": "<< zip_strerror(archive);
+            zip_close(archive);
+            exit(-1);
+        }
+    }
+    zip_close(archive);
+}
diff --git a/tools/extract_flow.cpp b/tools/extract_flow.cpp
new file mode 100644
index 0000000..238af85
--- /dev/null
+++ b/tools/extract_flow.cpp
@@ -0,0 +1,46 @@
+#include "dense_flow.h"
+#include "utils.h"
+INITIALIZE_EASYLOGGINGPP
+
+int main(int argc, char** argv)
+{
+	// IO operation
+
+	const char* keys =
+		{
+			"{ f vidFile   | ex2.avi | filename of video            }"
+			"{ x xFlowFile | flow_x  | filename of flow x component }"
+			"{ y yFlowFile | flow_y  | filename of flow x component }"
+			"{ i imgFile   | flow_i  | filename of flow image       }"
+			"{ b bound     | 15      | specify the maximum of optical flow}"
+			"{ t type      | 0       | specify the optical flow algorithm }"
+			"{ o out       | zip     | output style                 }"
+		};
+
+	CommandLineParser cmd(argc, argv, keys);
+	std::string vidFile = cmd.get<std::string>("vidFile");
+	std::string xFlowFile = cmd.get<std::string>("xFlowFile");
+	std::string yFlowFile = cmd.get<std::string>("yFlowFile");
+	std::string imgFile = cmd.get<std::string>("imgFile");
+	std::string output_style = cmd.get<std::string>("out");
+	int bound = cmd.get<int>("bound");
+    int type  = cmd.get<int>("type");
+
+//	LOG(INFO)<<"Starting extraction";
+	std::vector<std::vector<uchar> > out_vec_x, out_vec_y, out_vec_img;
+
+	calcDenseFlow(vidFile, bound, type, 1,
+					 out_vec_x, out_vec_y, out_vec_img);
+
+	if (output_style == "dir") {
+		writeImages(out_vec_x, xFlowFile);
+		writeImages(out_vec_y, yFlowFile);
+		writeImages(out_vec_img, imgFile);
+	}else{
+//		LOG(INFO)<<"Writing results to Zip archives";
+		writeZipFile(out_vec_x, "x_%05d.jpg", xFlowFile+".zip");
+		writeZipFile(out_vec_y, "y_%05d.jpg", yFlowFile+".zip");
+		writeZipFile(out_vec_img, "img_%05d.jpg", imgFile+".zip");
+	}
+	return 0;
+}
diff --git a/tools/extract_flow.py b/tools/extract_flow.py
new file mode 100644
index 0000000..123ca9f
--- /dev/null
+++ b/tools/extract_flow.py
@@ -0,0 +1,71 @@
+
+import sys
+
+sys.path.append('build/')
+
+import os
+from libpydenseflow import TVL1FlowExtractor, TVL1WarpFlowExtractor
+import numpy as np
+
+class FlowExtractor(object):
+
+    def __init__(self, dev_id, bound=20):
+        TVL1FlowExtractor.set_device(dev_id)
+        self._et = TVL1FlowExtractor(bound)
+
+    def extract_flow(self, frame_list, new_size=None):
+        """
+        This function extracts the optical flow and interleave x and y channels
+        :param frame_list:
+        :return:
+        """
+        frame_size = frame_list[0].shape[:2]
+        rst = self._et.extract_flow([x.tostring() for x in frame_list], frame_size[1], frame_size[0])
+        n_out = len(rst)
+        if new_size is None:
+            ret = np.zeros((n_out*2, frame_size[0], frame_size[1]))
+            for i in xrange(n_out):
+                ret[2*i, :] = np.fromstring(rst[i][0], dtype='uint8').reshape(frame_size)
+                ret[2*i+1, :] = np.fromstring(rst[i][1], dtype='uint8').reshape(frame_size)
+        else:
+            import cv2
+            ret = np.zeros((n_out*2, new_size[1], new_size[0]))
+            for i in xrange(n_out):
+                ret[2*i, :] = cv2.resize(np.fromstring(rst[i][0], dtype='uint8').reshape(frame_size), new_size)
+                ret[2*i+1, :] = cv2.resize(np.fromstring(rst[i][1], dtype='uint8').reshape(frame_size), new_size)
+
+        return ret
+
+def save_optical_flow(output_folder, flow_frames):
+    try:
+        os.mkdir(output_folder)
+    except OSError:
+        pass
+    nframes = len(flow_frames) / 2
+    for i in xrange(nframes):
+        out_x = '{0}/x_{1:04d}.jpg'.format(output_folder, i+1)
+        out_y = '{0}/y_{1:04d}.jpg'.format(output_folder, i+1)
+        cv2.imwrite(out_x, flow_frames[2*i])
+        cv2.imwrite(out_y, flow_frames[2*i+1])
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3: # TODO! argparse
+        print ("Missing arguments.\n"
+               "Usage: \n"
+               "    python tools/action_flow.py <INPUT VIDEO> <OUTPUT FOLDER>")
+        sys.exit(-1)
+
+    input_video = sys.argv[1]
+    output_folder = sys.argv[2]
+
+    import cv2
+    if os.path.exists(input_video):
+        frame_list = []
+        cap = cv2.VideoCapture(input_video)
+        ret, frame = cap.read()
+        while ret:
+            frame_list.append(frame)
+            ret, frame = cap.read()
+        f = FlowExtractor(dev_id=0)
+        flow_frames = f.extract_flow(frame_list)
+        save_optical_flow(output_folder, flow_frames)
diff --git a/tools/extract_flow_gpu.cpp b/tools/extract_flow_gpu.cpp
new file mode 100644
index 0000000..c7cea87
--- /dev/null
+++ b/tools/extract_flow_gpu.cpp
@@ -0,0 +1,55 @@
+#include "dense_flow.h"
+#include "utils.h"
+
+INITIALIZE_EASYLOGGINGPP
+
+using namespace cv::cuda;
+
+int main(int argc, char** argv){
+	// IO operation
+	const char* keys =
+		{
+			"{ f vidFile      | ex2.avi | filename of video }"
+			"{ x xFlowFile    | flow_x | filename of flow x component }"
+			"{ y yFlowFile    | flow_y | filename of flow x component }"
+			"{ i imgFile      | flow_i | filename of flow image}"
+			"{ b bound | 15 | specify the maximum of optical flow}"
+			"{ t type | 0 | specify the optical flow algorithm }"
+			"{ d device_id    | 0  | set gpu id}"
+			"{ s step  | 1 | specify the step for frame sampling}"
+			"{ o out | zip | output style}"
+			"{ w newWidth | 0 | output style}"
+			"{ h newHeight | 0 | output style}"
+		};
+
+	CommandLineParser cmd(argc, argv, keys);
+	std::string vidFile = cmd.get<std::string>("vidFile");
+	std::string xFlowFile = cmd.get<std::string>("xFlowFile");
+	std::string yFlowFile = cmd.get<std::string>("yFlowFile");
+	std::string imgFile = cmd.get<std::string>("imgFile");
+	std::string output_style = cmd.get<std::string>("out");
+	int bound = cmd.get<int>("bound");
+    int type  = cmd.get<int>("type");
+    int device_id = cmd.get<int>("device_id");
+    int step = cmd.get<int>("step");
+    int new_height = cmd.get<int>("newHeight");
+    int new_width = cmd.get<int>("newWidth");
+
+	std::vector<std::vector<uchar> > out_vec_x, out_vec_y, out_vec_img;
+
+	calcDenseFlowGPU(vidFile, bound, type, step, device_id,
+					 out_vec_x, out_vec_y, out_vec_img, new_width, new_height);
+
+	if (output_style == "dir") {
+		writeImages(out_vec_x, xFlowFile);
+		writeImages(out_vec_y, yFlowFile);
+		writeImages(out_vec_img, imgFile);
+	}else{
+//		LOG(INFO)<<"Writing results to Zip archives";
+		writeZipFile(out_vec_x, "x_%05d.jpg", xFlowFile+".zip");
+		writeZipFile(out_vec_y, "y_%05d.jpg", yFlowFile+".zip");
+		writeZipFile(out_vec_img, "img_%05d.jpg", imgFile+".zip");
+	}
+
+	return 0;
+}
diff --git a/tools/extract_warp_flow_gpu.cpp b/tools/extract_warp_flow_gpu.cpp
new file mode 100644
index 0000000..29af6cf
--- /dev/null
+++ b/tools/extract_warp_flow_gpu.cpp
@@ -0,0 +1,47 @@
+#include "dense_flow.h"
+#include "utils.h"
+
+INITIALIZE_EASYLOGGINGPP
+
+using namespace cv::cuda;
+
+int main(int argc, char** argv){
+	// IO operation
+	const char* keys =
+		{
+			"{ f vidFile   | ex2.avi | filename of video }"
+			"{ x xFlowFile | flow_x  | filename of flow x component }"
+			"{ y yFlowFile | flow_y  | filename of flow y component }"
+			"{ b bound     | 15      | specify the maximum of optical flow}"
+			"{ t type      | 0       | specify the optical flow algorithm }"
+			"{ d device_id | 0       | set gpu id}"
+			"{ s step      | 1       | specify the step for frame sampling}"
+			"{ o out       | zip     | output style}"
+		};
+
+	CommandLineParser cmd(argc, argv, keys);
+	std::string vidFile = cmd.get<std::string>("vidFile");
+	std::string xFlowFile = cmd.get<std::string>("xFlowFile");
+	std::string yFlowFile = cmd.get<std::string>("yFlowFile");
+	std::string output_style = cmd.get<std::string>("out");
+	int bound = cmd.get<int>("bound");
+    int type  = cmd.get<int>("type");
+    int device_id = cmd.get<int>("device_id");
+    int step = cmd.get<int>("step");
+
+	std::vector<std::vector<uchar> > out_vec_x, out_vec_y;
+
+	calcDenseWarpFlowGPU(vidFile, bound, type, step, device_id,
+					 out_vec_x, out_vec_y);
+
+	if (output_style == "dir") {
+		writeImages(out_vec_x, xFlowFile);
+		writeImages(out_vec_y, yFlowFile);
+	}else{
+//		LOG(INFO)<<"Writing results to Zip archives";
+		writeZipFile(out_vec_x, "x_%05d.jpg", xFlowFile+".zip");
+		writeZipFile(out_vec_y, "y_%05d.jpg", yFlowFile+".zip");
+	}
+
+	return 0;
+}