-
Notifications
You must be signed in to change notification settings - Fork 3
/
faceClassify.py
262 lines (230 loc) · 14.1 KB
/
faceClassify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import cv2
import os
import pytesseract
import difflib
import platform
import pandas as pd
def catchFaceAndClassify(dataset, name_lst, frame, num_frame, viewInfo, tmp_dict):
# base_img, tmp_dict = opencv_haar_cascade(dataset, name_lst, frame, num_frame, viewInfo, tmp_dict)
base_img, tmp_dict = opencv_dnn_classify(dataset, name_lst, frame, num_frame, viewInfo, tmp_dict)
return base_img, tmp_dict
def opencv_haar_cascade(dataset, name_lst, frame, num_frame, viewInfo, tmp_dict):
# opencv haar cascade classify
classfier = cv2.CascadeClassifier("./haarcascades/haarcascade_frontalface_default.xml")
if platform.system() == 'Windows':
pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe'
elif platform.system() == 'Darwin':
pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
base_img = frame.copy()
grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # change to greystyle
row = viewInfo.get('Row')
column = viewInfo.get('Column')
clip_width = int(viewInfo.get('Width') / row)
clip_height = int(viewInfo.get('Height') / column)
faceRects = classfier.detectMultiScale(frame, 1.1, 5, minSize=(8, 8)) # objects are returned as a list of rectangles.
if len(faceRects) > 0:
for faceRect in faceRects:
x, y, w, h = faceRect
face_row = int(x / clip_width)
face_col = int(y / clip_height)
if w>clip_width or h>clip_height: # avoid capture error
continue
tmp_row = int((x+w) / clip_width)
tmp_col = int((y+h) / clip_height)
if tmp_row!=face_row or tmp_col!=face_col: # avoid capture error
continue
if (str(face_row), str(face_col)) in tmp_dict.keys():
historical_name = tmp_dict[(str(face_row), str(face_col))]
clip_img = grey[y - 10:y + h + 10, x - 10:x + w + 10]
if clip_img.size != 0:
cv2.imwrite(dataset + '/' + historical_name + '/{0}.jpg'.format(num_frame), clip_img)
cv2.rectangle(base_img, (x, y), (x + w, y + h), (246, 108, 61),2)
else:
cropped = grey[clip_height * (face_col + 1) - 32:clip_height * (face_col + 1), clip_width * face_row:clip_width * face_row + 120] # decrease length
text = '' # initialize text
for k in range(2, 8, 1):
resized_text = cv2.resize(cropped, None, fx=k, fy=k)
ret, thresh1 = cv2.threshold(resized_text, 185, 255, cv2.THRESH_TOZERO)
text = pytesseract.image_to_string(thresh1) # OCR
text = ''.join([char for char in text if char.isalpha()]) # remove the character like '\n',' ','\0Xc'
if text == '':
# print('cannot recognize text using OCR frame num is:', num_frame, [face_row, face_col])
# cv2.imwrite('cropped text.jpg', cropped_text)
thresh1 = cv2.Canny(image=thresh1, threshold1=80, threshold2=150)
text = pytesseract.image_to_string(thresh1) # OCR
text = ''.join([char for char in text if char.isalpha()]) # remove the character like '\n',' ','\0Xc'
if text == '':
# print('is empty ', [face_row, face_col], text, 'k=', k)
continue
else:
text = string_comparison(text, name_lst)
if text in name_lst:
break
else:
text = string_comparison(text, name_lst)
if text not in name_lst:
thresh1 = cv2.Canny(image=thresh1, threshold1=80, threshold2=150)
text = pytesseract.image_to_string(thresh1) # OCR
text = ''.join([char for char in text if char.isalpha()]) # remove the character like '\n',' ','\0Xc'
if text == '':
# print('is empty ', [face_row, face_col], text, 'k=', k)
continue
else:
text = string_comparison(text, name_lst)
if text in name_lst:
break
else:
# print('text:', text)
break
if text == '':
continue
else:
tmp_dict[(str(face_row), str(face_col))] = text # add ocr result in dict and every one second refresh
try:
if text not in os.listdir(dataset) and text.isalpha() and text in name_lst:
os.makedirs("./" + dataset + "/" + text)
except Exception as e:
print("frame number:", num_frame, e)
pass
# print('after text is:', text)
clip_img = grey[y - 10:y + h + 10, x - 10:x + w + 10]
if clip_img.size != 0:
cv2.imwrite(dataset + '/' + text + '/{0}.jpg'.format(num_frame), clip_img)
cv2.rectangle(base_img, (x, y), (x + w, y + h), (246, 108, 61),2)
return base_img, tmp_dict
def opencv_dnn_classify(dataset, name_lst, frame, num_frame, viewInfo, tmp_dict):
# opencv dnn classify (0,0) at left upper corner, so right>left and bottom>top
detector = cv2.dnn.readNetFromCaffe("deploy.prototxt.txt", "res10_300x300_ssd_iter_140000.caffemodel")
base_img = frame.copy()
grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # change to greystyle
row = viewInfo.get('Row')
column = viewInfo.get('Column')
clip_width = int(viewInfo.get('Width') / row)
clip_height = int(viewInfo.get('Height') / column)
original_size = frame.shape
target_size = (300, 300)
image = cv2.resize(frame, target_size)
aspect_ratio_x = (original_size[1] / target_size[1])
aspect_ratio_y = (original_size[0] / target_size[0])
imageBlob = cv2.dnn.blobFromImage(image=image)
detector.setInput(imageBlob)
detections = detector.forward()
detections_df = pd.DataFrame(detections[0][0], columns=["img_id", "is_face", "confidence", "left", "top", "right", "bottom"])
detections_df = detections_df[detections_df['is_face'] == 1] # 0: background, 1: face
# print(detections_df.head())
detections_df = detections_df[detections_df['confidence'] >= 0.15]
for i, instance in detections_df.iterrows():
# print(instance)
confidence_score = str(round(100 * instance["confidence"], 2)) + " %"
left = int(instance["left"] * 300)
bottom = int(instance["bottom"] * 300)
right = int(instance["right"] * 300)
top = int(instance["top"] * 300)
# print('instance: ', instance)
# low resolution
detected_face = image[top:bottom, left:right]
saved_face = base_img[(int(top * aspect_ratio_y)):(int(bottom * aspect_ratio_y)),(int(left * aspect_ratio_x)):(int(right * aspect_ratio_x))]
# high resolution
# detected_face = base_img[int(top*aspect_ratio_y):int(bottom*aspect_ratio_y), int(left*aspect_ratio_x):int(right*aspect_ratio_x)]
if detected_face.shape[0] > 0 and detected_face.shape[1] > 0:
# low resolution
# cv2.putText(image, confidence_score, (left, top-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
# cv2.rectangle(image, (left, top), (right, bottom), (255, 255, 255), 1) #draw rectangle to main image
# high resolution
# cv2.putText(base_img, confidence_score, (int(left * aspect_ratio_x), int(top * aspect_ratio_y - 10)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
# cv2.rectangle(base_img, (int(left * aspect_ratio_x), int(top * aspect_ratio_y)),(int(right * aspect_ratio_x), int(bottom * aspect_ratio_y)), (255, 255, 255),1) # draw rectangle to main image
# -------------------
face_row = int(int(left * aspect_ratio_x) / clip_width)
face_col = int(int(top * aspect_ratio_y) / clip_height)
if ((bottom - top) * aspect_ratio_y) > clip_width or ((right - left) * aspect_ratio_x) > clip_height: # avoid capture error
continue
tmp_row = int(int(right * aspect_ratio_x) / clip_width)
tmp_col = int(int(bottom * aspect_ratio_y) / clip_height)
if tmp_row != face_row or tmp_col != face_col: # avoid capture error
continue
if (str(face_row), str(face_col)) in tmp_dict.keys():
historical_name = tmp_dict[str(face_row), str(face_col)]
if saved_face.size != 0:
written_img = cv2.cvtColor(saved_face, cv2.COLOR_BGR2GRAY)
cv2.imwrite(dataset + '/' + historical_name + '/{0}.jpg'.format(num_frame), written_img)
# cv2.putText(base_img, confidence_score, (int(left * aspect_ratio_x), int(top * aspect_ratio_y - 10)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.rectangle(base_img, (int(left * aspect_ratio_x), int(top * aspect_ratio_y)),(int(right * aspect_ratio_x), int(bottom * aspect_ratio_y)), (246, 108, 61),2) # draw rectangle to main image
else:
cropped = grey[clip_height * (face_col + 1) - 32:clip_height * (face_col + 1),
clip_width * face_row:clip_width * face_row + 120] # decrease text length
text = ''
for k in range(2, 8, 1):
resized_text = cv2.resize(cropped, None, fx=k, fy=k)
ret, thresh1 = cv2.threshold(resized_text, 185, 255, cv2.THRESH_TOZERO)
text = pytesseract.image_to_string(thresh1) # OCR
text = ''.join(
[char for char in text if char.isalpha()]) # remove the character like '\n',' ','\0Xc'
if text == '':
# print('cannot recognize text using OCR frame num is:', num_frame, [face_row, face_col])
# cv2.imwrite('cropped text.jpg', cropped_text)
thresh1 = cv2.Canny(image=thresh1, threshold1=80, threshold2=150)
text = pytesseract.image_to_string(thresh1) # OCR
text = ''.join(
[char for char in text if char.isalpha()]) # remove the character like '\n',' ','\0Xc'
if text == '':
# print('is empty ', [face_row, face_col], text, 'k=', k)
continue
else:
text = string_comparison(text, name_lst)
if text in name_lst:
break
else:
text = string_comparison(text, name_lst)
if text not in name_lst:
thresh1 = cv2.Canny(image=thresh1, threshold1=80, threshold2=150)
text = pytesseract.image_to_string(thresh1) # OCR
text = ''.join(
[char for char in text if char.isalpha()]) # remove the character like '\n',' ','\0Xc'
if text == '':
# print('is empty ', [face_row, face_col], text, 'k=', k)
continue
else:
text = string_comparison(text, name_lst)
if text in name_lst:
break
else:
# print('text:', text)
break
# cropped = cv2.resize(cropped, None, fx=7, fy=7)
# # if pixel greyscale>185, set this pixel=255, preprocess the character image to get good quality for OCR
# ret, thresh1 = cv2.threshold(cropped, 185, 255, cv2.THRESH_TOZERO)
# text = pytesseract.image_to_string(thresh1) # OCR
# text = ''.join([char for char in text if char.isalpha()]) # remove the character like '\n',' ','\0Xc'
if text == '':
continue
# # print('before text is:', text)
# text = string_comparison(text, name_lst)
tmp_dict[(str(face_row), str(face_col))] = text # add ocr result in dict and every one second refresh
try:
if text not in os.listdir(dataset) and text.isalpha():
os.makedirs("./" + dataset + "/" + text)
# print('creat folder of ' + text)
except Exception as e:
print("frame number:", num_frame, e)
pass
# print('after text is:', text)
if saved_face.size != 0:
written_img = cv2.cvtColor(saved_face, cv2.COLOR_BGR2GRAY)
cv2.imwrite(dataset + '/' + text + '/{0}.jpg'.format(num_frame), written_img)
# cv2.putText(base_img, confidence_score, (int(left * aspect_ratio_x), int(top * aspect_ratio_y - 10)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.rectangle(base_img, (int(left * aspect_ratio_x), int(top * aspect_ratio_y)),(int(right * aspect_ratio_x), int(bottom * aspect_ratio_y)), (246, 108, 61),2) # draw rectangle to main image
return base_img, tmp_dict
def string_comparison(text, name_lst): # get rid of small difference of OCR for the same character
simlar_str = text
lst = difflib.get_close_matches(text, name_lst, n=1, cutoff=0.75)
if lst:
return lst[0]
else:
return simlar_str
# 140min for classify 5min video if one frame ocr successful then the other 24 frame use same text in one second
# opencv dnn 19min29s ocr_period=1
# opencv dnn 17min01s ocr_period=2
# opencv dnn 15min43s ocr_period=3
# opencv haar 48min03s ocr_period=1
# opencv haar 41min40s ocr_period=2
# opencv haar 37min18s ocr_period=3