File size: 7,514 Bytes
e34aada |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
"""This script contains the image preprocessing code for Deep3DFaceRecon_pytorch
"""
import numpy as np
from scipy.io import loadmat
from PIL import Image
import cv2
import os
# from skimage import transform as trans
import torch
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
# calculating least square problem for image alignment
def POS(xp, x):
npts = xp.shape[1]
A = np.zeros([2*npts, 8])
A[0:2*npts-1:2, 0:3] = x.transpose()
A[0:2*npts-1:2, 3] = 1
A[1:2*npts:2, 4:7] = x.transpose()
A[1:2*npts:2, 7] = 1
b = np.reshape(xp.transpose(), [2*npts, 1])
k, _, _, _ = np.linalg.lstsq(A, b)
R1 = k[0:3]
R2 = k[4:7]
sTx = k[3]
sTy = k[7]
s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2
t = np.stack([sTx, sTy], axis=0)
return t, s
# bounding box for 68 landmark detection
def BBRegression(points, params):
w1 = params['W1']
b1 = params['B1']
w2 = params['W2']
b2 = params['B2']
data = points.copy()
data = data.reshape([5, 2])
data_mean = np.mean(data, axis=0)
x_mean = data_mean[0]
y_mean = data_mean[1]
data[:, 0] = data[:, 0] - x_mean
data[:, 1] = data[:, 1] - y_mean
rms = np.sqrt(np.sum(data ** 2)/5)
data = data / rms
data = data.reshape([1, 10])
data = np.transpose(data)
inputs = np.matmul(w1, data) + b1
inputs = 2 / (1 + np.exp(-2 * inputs)) - 1
inputs = np.matmul(w2, inputs) + b2
inputs = np.transpose(inputs)
x = inputs[:, 0] * rms + x_mean
y = inputs[:, 1] * rms + y_mean
w = 224/inputs[:, 2] * rms
rects = [x, y, w, w]
return np.array(rects).reshape([4])
# utils for landmark detection
def img_padding(img, box):
success = True
bbox = box.copy()
res = np.zeros([2*img.shape[0], 2*img.shape[1], 3])
res[img.shape[0] // 2: img.shape[0] + img.shape[0] //
2, img.shape[1] // 2: img.shape[1] + img.shape[1]//2] = img
bbox[0] = bbox[0] + img.shape[1] // 2
bbox[1] = bbox[1] + img.shape[0] // 2
if bbox[0] < 0 or bbox[1] < 0:
success = False
return res, bbox, success
# utils for landmark detection
def crop(img, bbox):
padded_img, padded_bbox, flag = img_padding(img, bbox)
if flag:
crop_img = padded_img[padded_bbox[1]: padded_bbox[1] +
padded_bbox[3], padded_bbox[0]: padded_bbox[0] + padded_bbox[2]]
crop_img = cv2.resize(crop_img.astype(np.uint8),
(224, 224), interpolation=cv2.INTER_CUBIC)
scale = 224 / padded_bbox[3]
return crop_img, scale
else:
return padded_img, 0
# utils for landmark detection
def scale_trans(img, lm, t, s):
imgw = img.shape[1]
imgh = img.shape[0]
M_s = np.array([[1, 0, -t[0] + imgw//2 + 0.5], [0, 1, -imgh//2 + t[1]]],
dtype=np.float32)
img = cv2.warpAffine(img, M_s, (imgw, imgh))
w = int(imgw / s * 100)
h = int(imgh / s * 100)
img = cv2.resize(img, (w, h))
lm = np.stack([lm[:, 0] - t[0] + imgw // 2, lm[:, 1] -
t[1] + imgh // 2], axis=1) / s * 100
left = w//2 - 112
up = h//2 - 112
bbox = [left, up, 224, 224]
cropped_img, scale2 = crop(img, bbox)
assert(scale2!=0)
t1 = np.array([bbox[0], bbox[1]])
# back to raw img s * crop + s * t1 + t2
t1 = np.array([w//2 - 112, h//2 - 112])
scale = s / 100
t2 = np.array([t[0] - imgw/2, t[1] - imgh / 2])
inv = (scale/scale2, scale * t1 + t2.reshape([2]))
return cropped_img, inv
# utils for landmark detection
def align_for_lm(img, five_points):
five_points = np.array(five_points).reshape([1, 10])
params = loadmat('util/BBRegressorParam_r.mat')
bbox = BBRegression(five_points, params)
assert(bbox[2] != 0)
bbox = np.round(bbox).astype(np.int32)
crop_img, scale = crop(img, bbox)
return crop_img, scale, bbox
# resize and crop images for face reconstruction
def resize_n_crop_img(img, lm, t, s, target_size=224., mask=None):
w0, h0 = img.size
w = (w0*s).astype(np.int32)
h = (h0*s).astype(np.int32)
left = (w/2 - target_size/2 + float((t[0] - w0/2)*s)).astype(np.int32)
right = left + target_size
up = (h/2 - target_size/2 + float((h0/2 - t[1])*s)).astype(np.int32)
below = up + target_size
img = img.resize((w, h), resample=Image.BICUBIC)
img = img.crop((left, up, right, below))
if mask is not None:
mask = mask.resize((w, h), resample=Image.BICUBIC)
mask = mask.crop((left, up, right, below))
lm = np.stack([lm[:, 0] - t[0] + w0/2, lm[:, 1] -
t[1] + h0/2], axis=1)*s
lm = lm - np.reshape(
np.array([(w/2 - target_size/2), (h/2-target_size/2)]), [1, 2])
return img, lm, mask
# utils for face reconstruction
def extract_5p(lm):
lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1
lm5p = np.stack([lm[lm_idx[0], :], np.mean(lm[lm_idx[[1, 2]], :], 0), np.mean(
lm[lm_idx[[3, 4]], :], 0), lm[lm_idx[5], :], lm[lm_idx[6], :]], axis=0)
lm5p = lm5p[[1, 2, 0, 3, 4], :]
return lm5p
# utils for face reconstruction
def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.):
"""
Return:
transparams --numpy.array (raw_W, raw_H, scale, tx, ty)
img_new --PIL.Image (target_size, target_size, 3)
lm_new --numpy.array (68, 2), y direction is opposite to v direction
mask_new --PIL.Image (target_size, target_size)
Parameters:
img --PIL.Image (raw_H, raw_W, 3)
lm --numpy.array (68, 2), y direction is opposite to v direction
lm3D --numpy.array (5, 3)
mask --PIL.Image (raw_H, raw_W, 3)
"""
w0, h0 = img.size
if lm.shape[0] != 5:
lm5p = extract_5p(lm)
else:
lm5p = lm
# calculate translation and scale factors using 5 facial landmarks and standard landmarks of a 3D face
t, s = POS(lm5p.transpose(), lm3D.transpose())
s = rescale_factor/s
# processing the image
img_new, lm_new, mask_new = resize_n_crop_img(img, lm, t, s, target_size=target_size, mask=mask)
t = t.reshape([2,])
trans_params = np.array([w0, h0, s, t[0], t[1]])
return trans_params, img_new, lm_new, mask_new
# utils for face recognition model
def estimate_norm(lm_68p, H):
# from https://github.com/deepinsight/insightface/blob/c61d3cd208a603dfa4a338bd743b320ce3e94730/recognition/common/face_align.py#L68
"""
Return:
trans_m --numpy.array (2, 3)
Parameters:
lm --numpy.array (68, 2), y direction is opposite to v direction
H --int/float , image height
"""
lm = extract_5p(lm_68p)
lm[:, -1] = H - 1 - lm[:, -1]
tform = trans.SimilarityTransform()
src = np.array(
[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
[41.5493, 92.3655], [70.7299, 92.2041]],
dtype=np.float32)
tform.estimate(lm, src)
M = tform.params
if np.linalg.det(M) == 0:
M = np.eye(3)
return M[0:2, :]
def estimate_norm_torch(lm_68p, H):
lm_68p_ = lm_68p.detach().cpu().numpy()
M = []
for i in range(lm_68p_.shape[0]):
M.append(estimate_norm(lm_68p_[i], H))
M = torch.tensor(np.array(M), dtype=torch.float32).to(lm_68p.device)
return M
|