asdasdasdasd commited on
Commit
c794a89
·
1 Parent(s): 6a66599

Upload landmark_utils.py

Browse files
Files changed (1) hide show
  1. landmark_utils.py +309 -0
landmark_utils.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tqdm import tqdm
2
+ import numpy as np
3
+ import dlib
4
+ from collections import OrderedDict
5
+ import cv2
6
+
7
+ detector = dlib.get_frontal_face_detector()
8
+ predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
9
+ FACIAL_LANDMARKS_68_IDXS = OrderedDict([
10
+ ("mouth", (48, 68)),
11
+ ("inner_mouth", (60, 68)),
12
+ ("right_eyebrow", (17, 22)),
13
+ ("left_eyebrow", (22, 27)),
14
+ ("right_eye", (36, 42)),
15
+ ("left_eye", (42, 48)),
16
+ ("nose", (27, 36)),
17
+ ("jaw", (0, 17))
18
+ ])
19
+
20
+
21
+ def shape_to_face(shape, width, height, scale=1.2):
22
+ """
23
+ Recalculate the face bounding box based on coarse landmark location(shape)
24
+ :param
25
+ shape: landmark locations
26
+ scale: the scale parameter of face, to enlarge the bounding box
27
+ :return:
28
+ face_new: new bounding box of face (1*4 list [x1, y1, x2, y2])
29
+ # face_center: the center coordinate of face (1*2 list [x_c, y_c])
30
+ face_size: the face is rectangular( width = height = size)(int)
31
+ """
32
+ x_min, y_min = np.min(shape, axis=0)
33
+ x_max, y_max = np.max(shape, axis=0)
34
+
35
+ x_center = (x_min + x_max) // 2
36
+ y_center = (y_min + y_max) // 2
37
+
38
+ face_size = int(max(x_max - x_min, y_max - y_min) * scale)
39
+ # Enforce it to be even
40
+ # Thus the real whole bounding box size will be an odd
41
+ # But after cropping the face size will become even and
42
+ # keep same to the face_size parameter.
43
+ face_size = face_size // 2 * 2
44
+
45
+ x1 = max(x_center - face_size // 2, 0)
46
+ y1 = max(y_center - face_size // 2, 0)
47
+
48
+ face_size = min(width - x1, face_size)
49
+ face_size = min(height - y1, face_size)
50
+
51
+ x2 = x1 + face_size
52
+ y2 = y1 + face_size
53
+
54
+ face_new = [int(x1), int(y1), int(x2), int(y2)]
55
+ return face_new, face_size
56
+
57
+
58
+ def predict_single_frame(frame):
59
+ """
60
+ :param frame: A full frame of video
61
+ :return:
62
+ face_num: the number of face (just to verify if successfully detect a face)
63
+ shape: landmark locations
64
+ """
65
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
66
+ faces = detector(gray, 0)
67
+ if len(faces) < 1:
68
+ return 0, None
69
+ face = faces[0]
70
+
71
+ landmarks = predictor(frame, face)
72
+ face_landmark_list = [(p.x, p.y) for p in landmarks.parts()]
73
+ shape = np.array(face_landmark_list)
74
+
75
+ return 1, shape
76
+
77
+
78
+ def landmark_align(shape):
79
+ desiredLeftEye = (0.35, 0.25)
80
+ desiredFaceWidth = 2
81
+ desiredFaceHeight = 2
82
+ (lStart, lEnd) = FACIAL_LANDMARKS_68_IDXS["left_eye"]
83
+ (rStart, rEnd) = FACIAL_LANDMARKS_68_IDXS["right_eye"]
84
+
85
+ leftEyePts = shape[lStart:lEnd]
86
+ rightEyePts = shape[rStart:rEnd]
87
+
88
+ # compute the center of mass for each eye
89
+ leftEyeCenter = leftEyePts.mean(axis=0) # .astype("int")
90
+ rightEyeCenter = rightEyePts.mean(axis=0) # .astype("int")
91
+ # compute the angle between the eye centroids
92
+ dY = rightEyeCenter[1] - leftEyeCenter[1]
93
+ dX = rightEyeCenter[0] - leftEyeCenter[0]
94
+ angle = np.degrees(np.arctan2(dY, dX)) # - 180
95
+
96
+ # compute the desired right eye x-coordinate based on the
97
+ # desired x-coordinate of the left eye
98
+ desiredRightEyeX = 1.0 - desiredLeftEye[0]
99
+
100
+ # determine the scale of the new resulting image by taking
101
+ # the ratio of the distance between eyes in the *current*
102
+ # image to the ratio of distance between eyes in the
103
+ # *desired* image
104
+ dist = np.sqrt((dX ** 2) + (dY ** 2))
105
+ desiredDist = (desiredRightEyeX - desiredLeftEye[0])
106
+ desiredDist *= desiredFaceWidth
107
+ scale = desiredDist / dist
108
+
109
+ # compute center (x, y)-coordinates (i.e., the median point)
110
+ # between the two eyes in the input image
111
+ eyesCenter = ((leftEyeCenter[0] + rightEyeCenter[0]) // 2,
112
+ (leftEyeCenter[1] + rightEyeCenter[1]) // 2)
113
+
114
+ # grab the rotation matrix for rotating and scaling the face
115
+ M = cv2.getRotationMatrix2D(eyesCenter, angle, scale)
116
+
117
+ # update the translation component of the matrix
118
+ tX = 0 # desiredFaceWidth * 0.5
119
+ tY = desiredFaceHeight * desiredLeftEye[1]
120
+ M[0, 2] += (tX - eyesCenter[0])
121
+ M[1, 2] += (tY - eyesCenter[1])
122
+
123
+ n, d = shape.shape
124
+ temp = np.zeros((n, d + 1), dtype="int")
125
+ temp[:, 0:2] = shape
126
+ temp[:, 2] = 1
127
+ aligned_landmarks = np.matmul(M, temp.T)
128
+ return aligned_landmarks.T # .astype("int"))
129
+
130
+
131
+ def check_and_merge(location, forward, feedback, P_predict, status_fw=None, status_fb=None):
132
+ num_pts = 68
133
+ check = [True] * num_pts
134
+
135
+ target = location[1]
136
+ forward_predict = forward[1]
137
+
138
+ # To ensure the robustness through feedback-check
139
+ forward_base = forward[0] # Also equal to location[0]
140
+ feedback_predict = feedback[0]
141
+ feedback_diff = feedback_predict - forward_base
142
+ feedback_dist = np.linalg.norm(feedback_diff, axis=1, keepdims=True)
143
+
144
+ # For Kalman Filtering
145
+ detect_diff = location[1] - location[0]
146
+ detect_dist = np.linalg.norm(detect_diff, axis=1, keepdims=True)
147
+ predict_diff = forward[1] - forward[0]
148
+ predict_dist = np.linalg.norm(predict_diff, axis=1, keepdims=True)
149
+ predict_dist[np.where(predict_dist == 0)] = 1 # Avoid nan
150
+ P_detect = (detect_dist / predict_dist).reshape(num_pts)
151
+
152
+ for ipt in range(num_pts):
153
+ if feedback_dist[ipt] > 2: # When use float
154
+ check[ipt] = False
155
+
156
+ if status_fw is not None and np.sum(status_fw) != num_pts:
157
+ for ipt in range(num_pts):
158
+ if status_fw[ipt][0] == 0:
159
+ check[ipt] = False
160
+ if status_fw is not None and np.sum(status_fb) != num_pts:
161
+ for ipt in range(num_pts):
162
+ if status_fb[ipt][0] == 0:
163
+ check[ipt] = False
164
+ location_merge = target.copy()
165
+ # Merge the results:
166
+ """
167
+ Use Kalman Filter to combine the calculate result and detect result.
168
+ """
169
+
170
+ Q = 0.3 # Process variance
171
+
172
+ for ipt in range(num_pts):
173
+ if check[ipt]:
174
+ # Kalman parameter
175
+ P_predict[ipt] += Q
176
+ K = P_predict[ipt] / (P_predict[ipt] + P_detect[ipt])
177
+ location_merge[ipt] = forward_predict[ipt] + K * (target[ipt] - forward_predict[ipt])
178
+ # Update the P_predict by the current K
179
+ P_predict[ipt] = (1 - K) * P_predict[ipt]
180
+ return location_merge, check, P_predict
181
+
182
+
183
+ def detect_frames_track(frames, fps, video):
184
+ frames_num = len(frames)
185
+ assert frames_num != 0
186
+ frame_height, frame_width = frames[0].shape[:2]
187
+ """
188
+ Pre-process:
189
+ To detect the original results,
190
+ and normalize each face to a certain width,
191
+ also its corresponding landmarks locations and
192
+ scale parameter.
193
+ """
194
+ face_size_normalized = 400
195
+ faces = []
196
+ locations = []
197
+ shapes_origin = []
198
+ shapes_para = [] # Use to recover the shape in whole frame. ([x1, y1, scale_shape])
199
+ face_size = 0
200
+ skipped = 0
201
+
202
+ """
203
+ Use single frame to detect face on Dlib (CPU)
204
+ """
205
+ # ----------------------------------------------------------------------------#
206
+
207
+ print("Detecting:")
208
+ for i in tqdm(range(frames_num)):
209
+ frame = frames[i]
210
+ face_num, shape = predict_single_frame(frame)
211
+
212
+ if face_num == 0:
213
+ if len(shapes_origin) == 0:
214
+ skipped += 1
215
+ # print("Skipped", skipped, "Frame_num", frames_num)
216
+ continue
217
+ shape = shapes_origin[i - 1 - skipped]
218
+
219
+ face, face_size = shape_to_face(shape, frame_width, frame_height, 1.2)
220
+ faceFrame = frame[face[1]: face[3],
221
+ face[0]:face[2]]
222
+ if face_size < face_size_normalized:
223
+ inter_para = cv2.INTER_CUBIC
224
+ else:
225
+ inter_para = cv2.INTER_AREA
226
+ face_norm = cv2.resize(faceFrame, (face_size_normalized, face_size_normalized), interpolation=inter_para)
227
+ scale_shape = face_size_normalized / face_size
228
+ shape_norm = np.rint((shape - np.array([face[0], face[1]])) * scale_shape).astype(int)
229
+ faces.append(face_norm)
230
+ shapes_para.append([face[0], face[1], scale_shape])
231
+ shapes_origin.append(shape)
232
+ locations.append(shape_norm)
233
+
234
+ """
235
+ Calibration module.
236
+ """
237
+ segment_length = 2
238
+ locations_sum = len(locations)
239
+ if locations_sum == 0:
240
+ return []
241
+ locations_track = [locations[0]]
242
+ num_pts = 68
243
+ P_predict = np.array([0] * num_pts).reshape(num_pts).astype(float)
244
+ print("Tracking")
245
+ for i in tqdm(range(locations_sum - 1)):
246
+ faces_seg = faces[i:i + segment_length]
247
+ locations_seg = locations[i:i + segment_length]
248
+
249
+ # ----------------------------------------------------------------------#
250
+ """
251
+ Numpy Version (DEPRECATED)
252
+ """
253
+
254
+ # locations_track_start = [locations_track[i]]
255
+ # forward_pts, feedback_pts = track_bidirectional(faces_seg, locations_track_start)
256
+ #
257
+ # forward_pts = np.rint(forward_pts).astype(int)
258
+ # feedback_pts = np.rint(feedback_pts).astype(int)
259
+ # merge_pt, check, P_predict = check_and_merge(locations_seg, forward_pts, feedback_pts, P_predict)
260
+
261
+ # ----------------------------------------------------------------------#
262
+ """
263
+ OpenCV Version
264
+ """
265
+
266
+ lk_params = dict(winSize=(15, 15),
267
+ maxLevel=3,
268
+ criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
269
+ # Use the tracked current location as input. Also use the next frame's predicted location for
270
+ # auxiliary initialization.
271
+
272
+ start_pt = locations_track[i].astype(np.float32)
273
+ target_pt = locations_seg[1].astype(np.float32)
274
+
275
+ forward_pt, status_fw, err_fw = cv2.calcOpticalFlowPyrLK(faces_seg[0], faces_seg[1],
276
+ start_pt, target_pt, **lk_params,
277
+ flags=cv2.OPTFLOW_USE_INITIAL_FLOW)
278
+ feedback_pt, status_fb, err_fb = cv2.calcOpticalFlowPyrLK(faces_seg[1], faces_seg[0],
279
+ forward_pt, start_pt, **lk_params,
280
+ flags=cv2.OPTFLOW_USE_INITIAL_FLOW)
281
+
282
+ forward_pts = [locations_track[i].copy(), forward_pt]
283
+ feedback_pts = [feedback_pt, forward_pt.copy()]
284
+
285
+ forward_pts = np.rint(forward_pts).astype(int)
286
+ feedback_pts = np.rint(feedback_pts).astype(int)
287
+
288
+ merge_pt, check, P_predict = check_and_merge(locations_seg, forward_pts, feedback_pts, P_predict, status_fw,
289
+ status_fb)
290
+
291
+ # ----------------------------------------------------------------------#
292
+
293
+ locations_track.append(merge_pt)
294
+
295
+ """
296
+ If us visualization, write the results to the visualize output folder.
297
+ """
298
+ if locations_sum != frames_num:
299
+ print("INFO: Landmarks detection failed in some frames. Therefore we disable the "
300
+ "visualization for this video. It will be optimized in future version.")
301
+
302
+ aligned_landmarks = []
303
+ for i in locations_track:
304
+ shape = landmark_align(i)
305
+ shape = shape.ravel()
306
+ shape = shape.tolist()
307
+ aligned_landmarks.append(shape)
308
+
309
+ return aligned_landmarks