forked from yinguobing/head-pose-estimation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mark_detector.py
183 lines (147 loc) · 6.28 KB
/
mark_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
"""Human facial landmark detector based on Convulutional Neural Network."""
import numpy as np
import tensorflow as tf
import cv2
class FaceDetector:
"""Detect human face from image"""
def __init__(self,
dnn_proto_text='assets/deploy.prototxt',
dnn_model='assets/res10_300x300_ssd_iter_140000.caffemodel'):
"""Initialization"""
self.face_net = cv2.dnn.readNetFromCaffe(dnn_proto_text, dnn_model)
self.detection_result = None
def get_faceboxes(self, image, threshold=0.5):
"""
Get the bounding box of faces in image using dnn.
"""
rows, cols, _ = image.shape
confidences = []
faceboxes = []
self.face_net.setInput(cv2.dnn.blobFromImage(
image, 1.0, (300, 300), (104.0, 177.0, 123.0), False, False))
detections = self.face_net.forward()
for result in detections[0, 0, :, :]:
confidence = result[2]
if confidence > threshold:
x_left_bottom = int(result[3] * cols)
y_left_bottom = int(result[4] * rows)
x_right_top = int(result[5] * cols)
y_right_top = int(result[6] * rows)
confidences.append(confidence)
faceboxes.append(
[x_left_bottom, y_left_bottom, x_right_top, y_right_top])
self.detection_result = [faceboxes, confidences]
return confidences, faceboxes
def draw_all_result(self, image):
"""Draw the detection result on image"""
for facebox, conf in self.detection_result:
cv2.rectangle(image, (facebox[0], facebox[1]),
(facebox[2], facebox[3]), (0, 255, 0))
label = "face: %.4f" % conf
label_size, base_line = cv2.getTextSize(
label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(image, (facebox[0], facebox[1] - label_size[1]),
(facebox[0] + label_size[0],
facebox[1] + base_line),
(0, 255, 0), cv2.FILLED)
cv2.putText(image, label, (facebox[0], facebox[1]),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
class MarkDetector:
"""Facial landmark detector by Convolutional Neural Network"""
def __init__(self, mark_model='assets/frozen_inference_graph.pb'):
"""Initialization"""
# A face detector is required for mark detection.
self.face_detector = FaceDetector()
self.cnn_input_size = 128
self.marks = None
# Get a TensorFlow session ready to do landmark detection
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(mark_model, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
self.graph = detection_graph
self.sess = tf.Session(graph=detection_graph)
@staticmethod
def draw_box(image, boxes, box_color=(255, 255, 255)):
"""Draw square boxes on image"""
for box in boxes:
cv2.rectangle(image,
(box[0], box[1]),
(box[2], box[3]), box_color)
@staticmethod
def move_box(box, offset):
"""Move the box to direction specified by vector offset"""
left_x = box[0] + offset[0]
top_y = box[1] + offset[1]
right_x = box[2] + offset[0]
bottom_y = box[3] + offset[1]
return [left_x, top_y, right_x, bottom_y]
@staticmethod
def get_square_box(box):
"""Get a square box out of the given box, by expanding it."""
left_x = box[0]
top_y = box[1]
right_x = box[2]
bottom_y = box[3]
box_width = right_x - left_x
box_height = bottom_y - top_y
# Check if box is already a square. If not, make it a square.
diff = box_height - box_width
delta = int(abs(diff) / 2)
if diff == 0: # Already a square.
return box
elif diff > 0: # Height > width, a slim box.
left_x -= delta
right_x += delta
if diff % 2 == 1:
right_x += 1
else: # Width > height, a short box.
top_y -= delta
bottom_y += delta
if diff % 2 == 1:
bottom_y += 1
# Make sure box is always square.
assert ((right_x - left_x) == (bottom_y - top_y)), 'Box is not square.'
return [left_x, top_y, right_x, bottom_y]
@staticmethod
def box_in_image(box, image):
"""Check if the box is in image"""
rows = image.shape[0]
cols = image.shape[1]
return box[0] >= 0 and box[1] >= 0 and box[2] <= cols and box[3] <= rows
def extract_cnn_facebox(self, image):
"""Extract face area from image."""
_, raw_boxes = self.face_detector.get_faceboxes(
image=image, threshold=0.9)
for box in raw_boxes:
# Move box down.
diff_height_width = (box[3] - box[1]) - (box[2] - box[0])
offset_y = int(abs(diff_height_width / 2))
box_moved = self.move_box(box, [0, offset_y])
# Make box square.
facebox = self.get_square_box(box_moved)
if self.box_in_image(facebox, image):
return facebox
return None
def detect_marks(self, image_np):
"""Detect marks from image"""
# Get result tensor by its name.
logits_tensor = self.graph.get_tensor_by_name('logits/BiasAdd:0')
# Actual detection.
predictions = self.sess.run(
logits_tensor,
feed_dict={'input_image_tensor:0': image_np})
# Convert predictions to landmarks.
marks = np.array(predictions).flatten()
marks = np.reshape(marks, (-1, 2))
return marks
@staticmethod
def draw_marks(image, marks, color=(255, 255, 255)):
"""Draw mark points on image"""
for mark in marks:
cv2.circle(image, (int(mark[0]), int(
mark[1])), 1, color, -1, cv2.LINE_AA)