ayooshkathuria · muyurainy · Aug 1, 2018
diff --git a/README.md b/README.md
@@ -58,13 +58,21 @@ python detect.py --images imgs --det det --reso 320
 ```
 
 ### On Video
-For this, you should run the file, video_demo.py with --video flag specifying the video file. The video file should be in .avi format
-since openCV only accepts OpenCV as the input format. 
+For this, you should run the file video_demo.py with --video flag specifying the video file. The video file should be in .avi format
+since openCV only accepts OpenCV as the input format.I add the supportment on save the video with --output flag, and you can use --noshow flag to not allow opencv show
 
 ```
 python video_demo.py --video video.avi
 ```
-
+add --output flag or --noshow flag as follow:
+```
+python video_demo.py --video video.avi --output output --noshow True
+```
+Furthermore, you can run the file video_demo_file.py with the flag like video_demo.py to process videos and output videos result once in a parallel mode
+```
+python video_demo_file --videos video --output output --noshow True
+```
+notice that the flag --videos need a dir which contains the input videos
 Tweakable settings can be seen with -h flag. 
 
 ### Speeding up Video Inference

diff --git a/video_demo.py b/video_demo.py
@@ -1,37 +1,37 @@
 from __future__ import division
 import time
-import torch 
+import torch
 import torch.nn as nn
 from torch.autograd import Variable
 import numpy as np
-import cv2 
+import cv2
 from util import *
 from darknet import Darknet
 from preprocess import prep_image, inp_to_image, letterbox_image
 import pandas as pd
-import random 
+import random
 import pickle as pkl
 import argparse
 
 
 def get_test_input(input_dim, CUDA):
     img = cv2.imread("dog-cycle-car.png")
-    img = cv2.resize(img, (input_dim, input_dim)) 
+    img = cv2.resize(img, (input_dim, input_dim))
     img_ =  img[:,:,::-1].transpose((2,0,1))
     img_ = img_[np.newaxis,:,:,:]/255.0
     img_ = torch.from_numpy(img_).float()
     img_ = Variable(img_)
-    
+
     if CUDA:
         img_ = img_.cuda()
-    
+
     return img_
 
 def prep_image(img, inp_dim):
     """
-    Prepare image for inputting to the neural network. 
-    
-    Returns a Variable 
+    Prepare image for inputting to the neural network.
+
+    Returns a Variable
     """
 
     orig_im = img
@@ -57,27 +57,33 @@ def write(x, img):
 def arg_parse():
     """
     Parse arguements to the detect module
-    
+
     """
-    
-    
+
+
     parser = argparse.ArgumentParser(description='YOLO v3 Video Detection Module')
-   
-    parser.add_argument("--video", dest = 'video', help = 
+
+    parser.add_argument("--video", dest = 'video', help =
                         "Video to run detection upon",
                         default = "video.avi", type = str)
     parser.add_argument("--dataset", dest = "dataset", help = "Dataset on which the network has been trained", default = "pascal")
     parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
     parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
-    parser.add_argument("--cfg", dest = 'cfgfile', help = 
+    parser.add_argument("--cfg", dest = 'cfgfile', help =
                         "Config file",
                         default = "cfg/yolov3.cfg", type = str)
-    parser.add_argument("--weights", dest = 'weightsfile', help = 
+    parser.add_argument("--weights", dest = 'weightsfile', help =
                         "weightsfile",
                         default = "yolov3.weights", type = str)
-    parser.add_argument("--reso", dest = 'reso', help = 
+    parser.add_argument("--reso", dest = 'reso', help =
                         "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
                         default = "416", type = str)
+    parser.add_argument("--output", dest = 'output', help =
+                        "video output dir",
+                        default = "output", type = str)
+    parser.add_argument("--noshow", dest = 'noshow', help =
+                        "wether show frame",
+                        default = False, type = bool)
     return parser.parse_args()
 
 
@@ -87,100 +93,117 @@ def arg_parse():
     nms_thesh = float(args.nms_thresh)
     start = 0
 
-    CUDA = torch.cuda.is_available()
-
     num_classes = 80
 
     CUDA = torch.cuda.is_available()
-    
+
     bbox_attrs = 5 + num_classes
-    
+
     print("Loading network.....")
     model = Darknet(args.cfgfile)
     model.load_weights(args.weightsfile)
     print("Network successfully loaded")
 
     model.net_info["height"] = args.reso
     inp_dim = int(model.net_info["height"])
-    assert inp_dim % 32 == 0 
+    assert inp_dim % 32 == 0
     assert inp_dim > 32
 
     if CUDA:
         model.cuda()
-        
+
     model(get_test_input(inp_dim, CUDA), CUDA)
 
     model.eval()
-    
+
     videofile = args.video
-    
+
     cap = cv2.VideoCapture(videofile)
-
+    FRAME_WIDTH = cap.get(3)
+    FRAME_HEIGHT = cap.get(4)
+    FRAME_FPS = cap.get(5)
+    FRAME_FOURCC = cap.get(6)
+    FRAME_FOURCC_1 = cap.get(cv2.CAP_PROP_FOURCC)
+    print (FRAME_WIDTH, FRAME_HEIGHT, FRAME_FPS, FRAME_FOURCC, FRAME_FOURCC_1)
+    output_file = args.output + '/result_' +args.video.split('/')[-1]
+    out = cv2.VideoWriter(output_file, int(FRAME_FOURCC), FRAME_FPS, (int(FRAME_WIDTH),int(FRAME_HEIGHT)))
+
+
+
     assert cap.isOpened(), 'Cannot capture source'
-    
+
     frames = 0
-    start = time.time()    
+    start = time.time()
+    start_time = time.time()
     while cap.isOpened():
-        
+
         ret, frame = cap.read()
         if ret:
-            
+
 
             img, orig_im, dim = prep_image(frame, inp_dim)
-            
-            im_dim = torch.FloatTensor(dim).repeat(1,2)                        
-            
-            
+
+            im_dim = torch.FloatTensor(dim).repeat(1,2)
+
+
             if CUDA:
                 im_dim = im_dim.cuda()
                 img = img.cuda()
-            
-            with torch.no_grad():   
+
+            with torch.no_grad():
                 output = model(Variable(img), CUDA)
             output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)
 
             if type(output) == int:
                 frames += 1
-                print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
-                cv2.imshow("frame", orig_im)
+                # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
+                # print('============================================================')
+                if not args.noshow:
+                    cv2.imshow("frame", orig_im)
+                if args.output is not None:
+                    out.write(orig_im)
                 key = cv2.waitKey(1)
                 if key & 0xFF == ord('q'):
                     break
                 continue
-
-
 
-            
+
             im_dim = im_dim.repeat(output.size(0), 1)
             scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)
-            
+
             output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
             output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2
-            
+
             output[:,1:5] /= scaling_factor
-    
+
             for i in range(output.shape[0]):
                 output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
                 output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])
-            
+
             classes = load_classes('data/coco.names')
             colors = pkl.load(open("pallete", "rb"))
-            
+
             list(map(lambda x: write(x, orig_im), output))
-
-
-            cv2.imshow("frame", orig_im)
+
+            if not args.noshow:
+                    cv2.imshow("frame", orig_im)
+            if args.output is not None:
+                    out.write(orig_im)
+
+            # cv2.imshow("frame", orig_im)
+            # out.write(orig_im)
             key = cv2.waitKey(1)
             if key & 0xFF == ord('q'):
                 break
             frames += 1
-            print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
+            # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
+
 
-
         else:
             break
-
-
-
-
+    # fourcc = cv2.writer (*'XVID')
+    cap.release()
+    out.release()
+    end_time = time.time()
+    print ("time: {}".format(str(end_time-start_time)))