Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add the option to process and save video or videos #40

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,21 @@ python detect.py --images imgs --det det --reso 320
```

### On Video
For this, you should run the file, video_demo.py with --video flag specifying the video file. The video file should be in .avi format
since openCV only accepts OpenCV as the input format.
For this, you should run the file video_demo.py with --video flag specifying the video file. The video file should be in .avi format
since openCV only accepts OpenCV as the input format.I add the supportment on save the video with --output flag, and you can use --noshow flag to not allow opencv show

```
python video_demo.py --video video.avi
```

add --output flag or --noshow flag as follow:
```
python video_demo.py --video video.avi --output output --noshow True
```
Furthermore, you can run the file video_demo_file.py with the flag like video_demo.py to process videos and output videos result once in a parallel mode
```
python video_demo_file --videos video --output output --noshow True
```
notice that the flag --videos need a dir which contains the input videos
Tweakable settings can be seen with -h flag.

### Speeding up Video Inference
Expand Down
133 changes: 78 additions & 55 deletions video_demo.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
from __future__ import division
import time
import torch
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
import cv2
from util import *
from darknet import Darknet
from preprocess import prep_image, inp_to_image, letterbox_image
import pandas as pd
import random
import random
import pickle as pkl
import argparse


def get_test_input(input_dim, CUDA):
img = cv2.imread("dog-cycle-car.png")
img = cv2.resize(img, (input_dim, input_dim))
img = cv2.resize(img, (input_dim, input_dim))
img_ = img[:,:,::-1].transpose((2,0,1))
img_ = img_[np.newaxis,:,:,:]/255.0
img_ = torch.from_numpy(img_).float()
img_ = Variable(img_)

if CUDA:
img_ = img_.cuda()

return img_

def prep_image(img, inp_dim):
"""
Prepare image for inputting to the neural network.
Returns a Variable
Prepare image for inputting to the neural network.

Returns a Variable
"""

orig_im = img
Expand All @@ -57,27 +57,33 @@ def write(x, img):
def arg_parse():
"""
Parse arguements to the detect module

"""


parser = argparse.ArgumentParser(description='YOLO v3 Video Detection Module')
parser.add_argument("--video", dest = 'video', help =

parser.add_argument("--video", dest = 'video', help =
"Video to run detection upon",
default = "video.avi", type = str)
parser.add_argument("--dataset", dest = "dataset", help = "Dataset on which the network has been trained", default = "pascal")
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
parser.add_argument("--cfg", dest = 'cfgfile', help =
parser.add_argument("--cfg", dest = 'cfgfile', help =
"Config file",
default = "cfg/yolov3.cfg", type = str)
parser.add_argument("--weights", dest = 'weightsfile', help =
parser.add_argument("--weights", dest = 'weightsfile', help =
"weightsfile",
default = "yolov3.weights", type = str)
parser.add_argument("--reso", dest = 'reso', help =
parser.add_argument("--reso", dest = 'reso', help =
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default = "416", type = str)
parser.add_argument("--output", dest = 'output', help =
"video output dir",
default = "output", type = str)
parser.add_argument("--noshow", dest = 'noshow', help =
"wether show frame",
default = False, type = bool)
return parser.parse_args()


Expand All @@ -87,100 +93,117 @@ def arg_parse():
nms_thesh = float(args.nms_thresh)
start = 0

CUDA = torch.cuda.is_available()

num_classes = 80

CUDA = torch.cuda.is_available()

bbox_attrs = 5 + num_classes

print("Loading network.....")
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("Network successfully loaded")

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim % 32 == 0
assert inp_dim > 32

if CUDA:
model.cuda()

model(get_test_input(inp_dim, CUDA), CUDA)

model.eval()

videofile = args.video

cap = cv2.VideoCapture(videofile)

FRAME_WIDTH = cap.get(3)
FRAME_HEIGHT = cap.get(4)
FRAME_FPS = cap.get(5)
FRAME_FOURCC = cap.get(6)
FRAME_FOURCC_1 = cap.get(cv2.CAP_PROP_FOURCC)
print (FRAME_WIDTH, FRAME_HEIGHT, FRAME_FPS, FRAME_FOURCC, FRAME_FOURCC_1)
output_file = args.output + '/result_' +args.video.split('/')[-1]
out = cv2.VideoWriter(output_file, int(FRAME_FOURCC), FRAME_FPS, (int(FRAME_WIDTH),int(FRAME_HEIGHT)))



assert cap.isOpened(), 'Cannot capture source'

frames = 0
start = time.time()
start = time.time()
start_time = time.time()
while cap.isOpened():

ret, frame = cap.read()
if ret:


img, orig_im, dim = prep_image(frame, inp_dim)
im_dim = torch.FloatTensor(dim).repeat(1,2)

im_dim = torch.FloatTensor(dim).repeat(1,2)


if CUDA:
im_dim = im_dim.cuda()
img = img.cuda()
with torch.no_grad():

with torch.no_grad():
output = model(Variable(img), CUDA)
output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)

if type(output) == int:
frames += 1
print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
cv2.imshow("frame", orig_im)
# print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
# print('============================================================')
if not args.noshow:
cv2.imshow("frame", orig_im)
if args.output is not None:
out.write(orig_im)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
continue




im_dim = im_dim.repeat(output.size(0), 1)
scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)

output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2

output[:,1:5] /= scaling_factor

for i in range(output.shape[0]):
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])

classes = load_classes('data/coco.names')
colors = pkl.load(open("pallete", "rb"))

list(map(lambda x: write(x, orig_im), output))


cv2.imshow("frame", orig_im)

if not args.noshow:
cv2.imshow("frame", orig_im)
if args.output is not None:
out.write(orig_im)

# cv2.imshow("frame", orig_im)
# out.write(orig_im)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
frames += 1
print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
# print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))



else:
break




# fourcc = cv2.writer (*'XVID')
cap.release()
out.release()
end_time = time.time()
print ("time: {}".format(str(end_time-start_time)))

Loading