rohitjakkam commited on
Commit
34a4d54
·
verified ·
1 Parent(s): 057b980

Upload 3 files

Browse files
Files changed (3) hide show
  1. networks/layers.py +49 -0
  2. requirements.txt +7 -0
  3. utils/utils.py +376 -0
networks/layers.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow.keras.layers import Layer, Dense
3
+
4
+
5
+ def sin_activation(x, omega=30):
6
+ return tf.math.sin(omega * x)
7
+
8
+
9
+ class AdaIN(Layer):
10
+ def __init__(self, **kwargs):
11
+ super(AdaIN, self).__init__(**kwargs)
12
+
13
+ def build(self, input_shapes):
14
+ x_shape = input_shapes[0]
15
+ w_shape = input_shapes[1]
16
+
17
+ self.w_channels = w_shape[-1]
18
+ self.x_channels = x_shape[-1]
19
+
20
+ self.dense_1 = Dense(self.x_channels)
21
+ self.dense_2 = Dense(self.x_channels)
22
+
23
+ def call(self, inputs):
24
+ x, w = inputs
25
+ ys = tf.reshape(self.dense_1(w), (-1, 1, 1, self.x_channels))
26
+ yb = tf.reshape(self.dense_2(w), (-1, 1, 1, self.x_channels))
27
+ return ys * x + yb
28
+
29
+ def get_config(self):
30
+ config = {
31
+ #'w_channels': self.w_channels,
32
+ #'x_channels': self.x_channels
33
+ }
34
+ base_config = super(AdaIN, self).get_config()
35
+ return dict(list(base_config.items()) + list(config.items()))
36
+
37
+
38
+ class AdaptiveAttention(Layer):
39
+
40
+ def __init__(self, **kwargs):
41
+ super(AdaptiveAttention, self).__init__(**kwargs)
42
+
43
+ def call(self, inputs):
44
+ m, a, i = inputs
45
+ return (1 - m) * a + m * i
46
+
47
+ def get_config(self):
48
+ base_config = super(AdaptiveAttention, self).get_config()
49
+ return base_config
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ tensorflow==2.10
2
+ tensorflow-addons==0.17.1
3
+ opencv-python-headless
4
+ scipy
5
+ pillow
6
+ scikit-image
7
+ huggingface_hub
utils/utils.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from tensorflow.keras.models import model_from_json
3
+ from networks.layers import AdaIN, AdaptiveAttention
4
+ import tensorflow as tf
5
+
6
+ import numpy as np
7
+ import cv2
8
+ import math
9
+ from skimage import transform as trans
10
+ from scipy.signal import convolve2d
11
+ from skimage.color import rgb2yuv, yuv2rgb
12
+
13
+ from PIL import Image
14
+
15
+
16
+ def save_model_internal(model, path, name, num):
17
+ json_model = model.to_json()
18
+ with open(path + name + '.json', "w") as json_file:
19
+ json_file.write(json_model)
20
+
21
+ model.save_weights(path + name + '_' + str(num) + '.h5')
22
+
23
+
24
+ def load_model_internal(path, name, num):
25
+ with open(path + name + '.json', 'r') as json_file:
26
+ model_dict = json_file.read()
27
+
28
+ mod = model_from_json(model_dict, custom_objects={'AdaIN': AdaIN, 'AdaptiveAttention': AdaptiveAttention})
29
+ mod.load_weights(path + name + '_' + str(num) + '.h5')
30
+
31
+ return mod
32
+
33
+
34
+ def save_training_meta(state_dict, path, num):
35
+ with open(path + str(num) + '.json', 'w') as json_file:
36
+ json.dump(state_dict, json_file, indent=2)
37
+
38
+
39
+ def load_training_meta(path, num):
40
+ with open(path + str(num) + '.json', 'r') as json_file:
41
+ state_dict = json.load(json_file)
42
+ return state_dict
43
+
44
+
45
+ def log_info(sw, results_dict, iteration):
46
+ with sw.as_default():
47
+ for key in results_dict.keys():
48
+ tf.summary.scalar(key, results_dict[key], step=iteration)
49
+
50
+
51
+ src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],
52
+ [51.157, 89.050], [57.025, 89.702]],
53
+ dtype=np.float32)
54
+ # <--left
55
+ src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],
56
+ [45.177, 86.190], [64.246, 86.758]],
57
+ dtype=np.float32)
58
+
59
+ # ---frontal
60
+ src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],
61
+ [42.463, 87.010], [69.537, 87.010]],
62
+ dtype=np.float32)
63
+
64
+ # -->right
65
+ src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],
66
+ [48.167, 86.758], [67.236, 86.190]],
67
+ dtype=np.float32)
68
+
69
+ # -->right profile
70
+ src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],
71
+ [55.388, 89.702], [61.257, 89.050]],
72
+ dtype=np.float32)
73
+
74
+ src = np.array([src1, src2, src3, src4, src5])
75
+ src_map = {112: src, 224: src * 2}
76
+
77
+ # Left eye, right eye, nose, left mouth, right mouth
78
+ arcface_src = np.array(
79
+ [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
80
+ [41.5493, 92.3655], [70.7299, 92.2041]],
81
+ dtype=np.float32)
82
+
83
+ arcface_src = np.expand_dims(arcface_src, axis=0)
84
+
85
+
86
+ def extract_face(img, bb, absolute_center, mode='arcface', extention_rate=0.05, debug=False):
87
+ """Extract face from image given a bounding box"""
88
+ # bbox
89
+ x1, y1, x2, y2 = bb + 60
90
+ adjusted_absolute_center = (absolute_center[0] + 60, absolute_center[1] + 60)
91
+ if debug:
92
+ print(bb + 60)
93
+ x1, y1, x2, y2 = bb
94
+ cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3)
95
+ cv2.circle(img, absolute_center, 1, (255, 0, 255), 2)
96
+ Image.fromarray(img).show()
97
+ x1, y1, x2, y2 = bb + 60
98
+ # Pad image in case face is out of frame
99
+ padded_img = np.zeros(shape=(248, 248, 3), dtype=np.uint8)
100
+ padded_img[60:-60, 60:-60, :] = img
101
+
102
+ if debug:
103
+ cv2.rectangle(padded_img, (x1, y1), (x2, y2), (0, 255, 255), 3)
104
+ cv2.circle(padded_img, adjusted_absolute_center, 1, (255, 255, 255), 2)
105
+ Image.fromarray(padded_img).show()
106
+
107
+ y_len = abs(y1 - y2)
108
+ x_len = abs(x1 - x2)
109
+
110
+ new_len = (y_len + x_len) // 2
111
+
112
+ extension = int(new_len * extention_rate)
113
+
114
+ x_adjust = (x_len - new_len) // 2
115
+ y_adjust = (y_len - new_len) // 2
116
+
117
+ x_1_adjusted = x1 + x_adjust - extension
118
+ x_2_adjusted = x2 - x_adjust + extension
119
+
120
+ if mode == 'arcface':
121
+ y_1_adjusted = y1 - extension
122
+ y_2_adjusted = y2 - 2 * y_adjust + extension
123
+ else:
124
+ y_1_adjusted = y1 + 2 * y_adjust - extension
125
+ y_2_adjusted = y2 + extension
126
+
127
+ move_x = adjusted_absolute_center[0] - (x_1_adjusted + x_2_adjusted) // 2
128
+ move_y = adjusted_absolute_center[1] - (y_1_adjusted + y_2_adjusted) // 2
129
+
130
+ x_1_adjusted = x_1_adjusted + move_x
131
+ x_2_adjusted = x_2_adjusted + move_x
132
+ y_1_adjusted = y_1_adjusted + move_y
133
+ y_2_adjusted = y_2_adjusted + move_y
134
+
135
+ # print(y_1_adjusted, y_2_adjusted, x_1_adjusted, x_2_adjusted)
136
+
137
+ return padded_img[y_1_adjusted:y_2_adjusted, x_1_adjusted:x_2_adjusted]
138
+
139
+
140
+ def distance(a, b):
141
+ return np.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)
142
+
143
+
144
+ def euclidean_distance(a, b):
145
+ x1 = a[0]; y1 = a[1]
146
+ x2 = b[0]; y2 = b[1]
147
+ return np.sqrt(((x2 - x1) * (x2 - x1)) + ((y2 - y1) * (y2 - y1)))
148
+
149
+
150
+ def align_face(img, landmarks, debug=False):
151
+ nose, right_eye, left_eye = landmarks
152
+
153
+ left_eye_x = left_eye[0]
154
+ left_eye_y = left_eye[1]
155
+
156
+ right_eye_x = right_eye[0]
157
+ right_eye_y = right_eye[1]
158
+
159
+ center_eye = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2)
160
+
161
+ if left_eye_y < right_eye_y:
162
+ point_3rd = (right_eye_x, left_eye_y)
163
+ direction = -1
164
+ else:
165
+ point_3rd = (left_eye_x, right_eye_y)
166
+ direction = 1
167
+
168
+ if debug:
169
+ cv2.circle(img, point_3rd, 1, (255, 0, 0), 1)
170
+ cv2.circle(img, center_eye, 1, (255, 0, 0), 1)
171
+
172
+ cv2.line(img, right_eye, left_eye, (0, 0, 0), 1)
173
+ cv2.line(img, left_eye, point_3rd, (0, 0, 0), 1)
174
+ cv2.line(img, right_eye, point_3rd, (0, 0, 0), 1)
175
+
176
+ a = euclidean_distance(left_eye, point_3rd)
177
+ b = euclidean_distance(right_eye, left_eye)
178
+ c = euclidean_distance(right_eye, point_3rd)
179
+
180
+ cos_a = (b * b + c * c - a * a) / (2 * b * c)
181
+
182
+ angle = np.arccos(cos_a)
183
+
184
+ angle = (angle * 180) / np.pi
185
+
186
+ if direction == -1:
187
+ angle = 90 - angle
188
+ ang = math.radians(direction * angle)
189
+ else:
190
+ ang = math.radians(direction * angle)
191
+ angle = 0 - angle
192
+
193
+ M = cv2.getRotationMatrix2D((64, 64), angle, 1)
194
+ new_img = cv2.warpAffine(img, M, (128, 128),
195
+ flags=cv2.INTER_CUBIC)
196
+
197
+ rotated_nose = (int((nose[0] - 64) * np.cos(ang) - (nose[1] - 64) * np.sin(ang) + 64),
198
+ int((nose[0] - 64) * np.sin(ang) + (nose[1] - 64) * np.cos(ang) + 64))
199
+
200
+ rotated_center_eye = (int((center_eye[0] - 64) * np.cos(ang) - (center_eye[1] - 64) * np.sin(ang) + 64),
201
+ int((center_eye[0] - 64) * np.sin(ang) + (center_eye[1] - 64) * np.cos(ang) + 64))
202
+
203
+ abolute_center = (rotated_center_eye[0], (rotated_nose[1] + rotated_center_eye[1]) // 2)
204
+
205
+ if debug:
206
+ cv2.circle(new_img, rotated_nose, 1, (0, 0, 255), 1)
207
+ cv2.circle(new_img, rotated_center_eye, 1, (0, 0, 255), 1)
208
+ cv2.circle(new_img, abolute_center, 1, (0, 0, 255), 1)
209
+
210
+ return new_img, abolute_center
211
+
212
+
213
+ def estimate_norm(lmk, image_size=112, mode='arcface', shrink_factor=1.0):
214
+ assert lmk.shape == (5, 2)
215
+ tform = trans.SimilarityTransform()
216
+ lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
217
+ min_M = []
218
+ min_index = []
219
+ min_error = float('inf')
220
+ src_factor = image_size / 112
221
+ if mode == 'arcface':
222
+ src = arcface_src * shrink_factor + (1 - shrink_factor) * 56
223
+ src = src * src_factor
224
+ else:
225
+ src = src_map[image_size] * src_factor
226
+ for i in np.arange(src.shape[0]):
227
+ tform.estimate(lmk, src[i])
228
+ M = tform.params[0:2, :]
229
+ results = np.dot(M, lmk_tran.T)
230
+ results = results.T
231
+ error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))
232
+ # print(error)
233
+ if error < min_error:
234
+ min_error = error
235
+ min_M = M
236
+ min_index = i
237
+ return min_M, min_index
238
+
239
+
240
+ def inverse_estimate_norm(lmk, t_lmk, image_size=112, mode='arcface', shrink_factor=1.0):
241
+ assert lmk.shape == (5, 2)
242
+ tform = trans.SimilarityTransform()
243
+ lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
244
+ min_M = []
245
+ min_index = []
246
+ min_error = float('inf')
247
+ src_factor = image_size / 112
248
+ if mode == 'arcface':
249
+ src = arcface_src * shrink_factor + (1 - shrink_factor) * 56
250
+ src = src * src_factor
251
+ else:
252
+ src = src_map[image_size] * src_factor
253
+ for i in np.arange(src.shape[0]):
254
+ tform.estimate(t_lmk, lmk)
255
+ M = tform.params[0:2, :]
256
+ results = np.dot(M, lmk_tran.T)
257
+ results = results.T
258
+ error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))
259
+ # print(error)
260
+ if error < min_error:
261
+ min_error = error
262
+ min_M = M
263
+ min_index = i
264
+ return min_M, min_index
265
+
266
+
267
+ def norm_crop(img, landmark, image_size=112, mode='arcface', shrink_factor=1.0):
268
+ """
269
+ Align and crop the image based of the facial landmarks in the image. The alignment is done with
270
+ a similarity transformation based of source coordinates.
271
+ :param img: Image to transform.
272
+ :param landmark: Five landmark coordinates in the image.
273
+ :param image_size: Desired output size after transformation.
274
+ :param mode: 'arcface' aligns the face for the use of Arcface facial recognition model. Useful for
275
+ both facial recognition tasks and face swapping tasks.
276
+ :param shrink_factor: Shrink factor that shrinks the source landmark coordinates. This will include more border
277
+ information around the face. Useful when you want to include more background information when performing face swaps.
278
+ The lower the shrink factor the more of the face is included. Default value 1.0 will align the image to be ready
279
+ for the Arcface recognition model, but usually omits part of the chin. Value of 0.0 would transform all source points
280
+ to the middle of the image, probably rendering the alignment procedure useless.
281
+ If you process the image with a shrink factor of 0.85 and then want to extract the identity embedding with arcface,
282
+ you simply do a central crop of factor 0.85 to yield same cropped result as using shrink factor 1.0. This will
283
+ reduce the resolution, the recommendation is to processed images to output resolutions higher than 112 is using
284
+ Arcface. This will make sure no information is lost by resampling the image after central crop.
285
+ :return: Returns the transformed image.
286
+ """
287
+ M, pose_index = estimate_norm(landmark, image_size, mode, shrink_factor=shrink_factor)
288
+ warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
289
+ return warped
290
+
291
+
292
+ def transform_landmark_points(M, points):
293
+ lmk_tran = np.insert(points, 2, values=np.ones(5), axis=1)
294
+ transformed_lmk = np.dot(M, lmk_tran.T)
295
+ transformed_lmk = transformed_lmk.T
296
+
297
+ return transformed_lmk
298
+
299
+
300
+ def multi_convolver(image, kernel, iterations):
301
+ if kernel == "Sharpen":
302
+ kernel = np.array([[0, -1, 0],
303
+ [-1, 5, -1],
304
+ [0, -1, 0]])
305
+ elif kernel == "Unsharp_mask":
306
+ kernel = np.array([[1, 4, 6, 4, 1],
307
+ [4, 16, 24, 16, 1],
308
+ [6, 24, -476, 24, 1],
309
+ [4, 16, 24, 16, 1],
310
+ [1, 4, 6, 4, 1]]) * (-1 / 256)
311
+ elif kernel == "Blur":
312
+ kernel = (1 / 16.0) * np.array([[1., 2., 1.],
313
+ [2., 4., 2.],
314
+ [1., 2., 1.]])
315
+ for i in range(iterations):
316
+ image = convolve2d(image, kernel, 'same', boundary='fill', fillvalue = 0)
317
+ return image
318
+
319
+
320
+ def convolve_rgb(image, kernel, iterations=1):
321
+ img_yuv = rgb2yuv(image)
322
+ img_yuv[:, :, 0] = multi_convolver(img_yuv[:, :, 0], kernel,
323
+ iterations)
324
+ final_image = yuv2rgb(img_yuv)
325
+
326
+ return final_image.astype('float32')
327
+
328
+
329
+ def generate_mask_from_landmarks(lms, im_size):
330
+ blend_mask_lm = np.zeros(shape=(im_size, im_size, 3), dtype='float32')
331
+
332
+ # EYES
333
+ blend_mask_lm = cv2.circle(blend_mask_lm,
334
+ (int(lms[0][0]), int(lms[0][1])), 12, (255, 255, 255), 30)
335
+ blend_mask_lm = cv2.circle(blend_mask_lm,
336
+ (int(lms[1][0]), int(lms[1][1])), 12, (255, 255, 255), 30)
337
+ blend_mask_lm = cv2.circle(blend_mask_lm,
338
+ (int((lms[0][0] + lms[1][0]) / 2), int((lms[0][1] + lms[1][1]) / 2)),
339
+ 16, (255, 255, 255), 65)
340
+
341
+ # NOSE
342
+ blend_mask_lm = cv2.circle(blend_mask_lm,
343
+ (int(lms[2][0]), int(lms[2][1])), 5, (255, 255, 255), 5)
344
+ blend_mask_lm = cv2.circle(blend_mask_lm,
345
+ (int((lms[0][0] + lms[1][0]) / 2), int(lms[2][1])), 16, (255, 255, 255), 100)
346
+
347
+ # MOUTH
348
+ blend_mask_lm = cv2.circle(blend_mask_lm,
349
+ (int(lms[3][0]), int(lms[3][1])), 6, (255, 255, 255), 30)
350
+ blend_mask_lm = cv2.circle(blend_mask_lm,
351
+ (int(lms[4][0]), int(lms[4][1])), 6, (255, 255, 255), 30)
352
+
353
+ blend_mask_lm = cv2.circle(blend_mask_lm,
354
+ (int((lms[3][0] + lms[4][0]) / 2), int((lms[3][1] + lms[4][1]) / 2)),
355
+ 16, (255, 255, 255), 40)
356
+ return blend_mask_lm
357
+
358
+
359
+ def display_distance_text(im, distance, lms, im_w, im_h, scale=2):
360
+ blended_insert = cv2.putText(im, str(distance)[:4],
361
+ (int(lms[4] * im_w * 0.5), int(lms[5] * im_h * 0.8)),
362
+ cv2.FONT_HERSHEY_SIMPLEX, scale * 0.5, (0.08, 0.16, 0.08), int(scale * 2))
363
+ blended_insert = cv2.putText(blended_insert, str(distance)[:4],
364
+ (int(lms[4] * im_w * 0.5), int(lms[5] * im_h * 0.8)),
365
+ cv2.FONT_HERSHEY_SIMPLEX, scale* 0.5, (0.3, 0.7, 0.32), int(scale * 1))
366
+ return blended_insert
367
+
368
+
369
+ def get_lm(annotation, im_w, im_h):
370
+ lm_align = np.array([[annotation[4] * im_w, annotation[5] * im_h],
371
+ [annotation[6] * im_w, annotation[7] * im_h],
372
+ [annotation[8] * im_w, annotation[9] * im_h],
373
+ [annotation[10] * im_w, annotation[11] * im_h],
374
+ [annotation[12] * im_w, annotation[13] * im_h]],
375
+ dtype=np.float32)
376
+ return lm_align