-
Notifications
You must be signed in to change notification settings - Fork 9
/
detect_video.py
121 lines (93 loc) · 3.99 KB
/
detect_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import tensorflow as tf
from yolov3_tf2.models import ( YoloV3, YoloV3Tiny )
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import draw_outputs
import datetime
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
flags.DEFINE_string('classes', './data/labels/coco.names', 'path to classes file')
flags.DEFINE_string('weights', './weights/yolov3.tf',
'path to weights file')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_integer('size', 416, 'resize images to')
flags.DEFINE_string('video', './data/video/paris.mp4',
'path to video file or number for webcam)')
flags.DEFINE_string('output', None, 'path to output video')
flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
song = AudioSegment.from_wav("1.wav")
def main(_argv):
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
if FLAGS.tiny:
yolo = YoloV3Tiny(classes=FLAGS.num_classes)
else:
yolo = YoloV3(classes=FLAGS.num_classes)
yolo.load_weights(FLAGS.weights)
logging.info('weights loaded')
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
times = []
try:
vid = cv2.VideoCapture(int(FLAGS.video))
except:
vid = cv2.VideoCapture(FLAGS.video)
out = None
if FLAGS.output:
# by default VideoCapture returns float instead of int
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
fps = 0.0
count = 0
while True:
_, img = vid.read()
if img is None:
logging.warning("Empty Frame")
time.sleep(0.1)
count+=1
if count < 3:
continue
else:
break
img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_in = tf.expand_dims(img_in, 0)
img_in = transform_images(img_in, FLAGS.size)
t1 = time.time()
boxes, scores, classes, nums = yolo.predict(img_in)
fps = ( fps + (1./(time.time()-t1)) ) / 2
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
if FLAGS.output:
out.write(img)
cv2.imshow('output', img)
#cv2.resizeWindow('output', 300,300)
print("the number of objects: {}".format(int(nums))) #number of objects
print(datetime.datetime.now()) #time of detections
print('detection objects: ') #class of objects
for i in range(nums[0]):
print('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
np.array(scores[0][i]),
np.array(boxes[0][i])))
if (class_names[int(classes[0][i])]) == "person": #filter for humans
print("yes, human detected ")
play(song)
print("-------------")
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
# Nima