File size: 7,806 Bytes
e34aada |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import cv2
import torch
from utils.commons.image_utils import dilate, erode
from sklearn.neighbors import NearestNeighbors
import copy
import numpy as np
from utils.commons.meters import Timer
def hold_eye_opened_for_secc(img):
img = img.permute(1,2,0).cpu().numpy()
img = ((img +1)/2*255).astype(np.uint)
face_mask = (img[...,0] != 0) & (img[...,1] != 0) & (img[...,2] != 0)
face_xys = np.stack(np.nonzero(face_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
h,w = face_mask.shape
# get face and eye mask
left_eye_prior_reigon = np.zeros([h,w], dtype=bool)
right_eye_prior_reigon = np.zeros([h,w], dtype=bool)
left_eye_prior_reigon[h//4:h//2, w//4:w//2] = True
right_eye_prior_reigon[h//4:h//2, w//2:w//4*3] = True
eye_prior_reigon = left_eye_prior_reigon | right_eye_prior_reigon
coarse_eye_mask = (~ face_mask) & eye_prior_reigon
coarse_eye_xys = np.stack(np.nonzero(coarse_eye_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
opened_eye_mask = cv2.imread('inference/os_avatar/opened_eye_mask.png')
opened_eye_mask = torch.nn.functional.interpolate(torch.tensor(opened_eye_mask).permute(2,0,1).unsqueeze(0), size=(img.shape[0], img.shape[1]), mode='nearest')[0].permute(1,2,0).sum(-1).bool().cpu() # [512,512,3]
coarse_opened_eye_xys = np.stack(np.nonzero(opened_eye_mask)) # [N_nonbg,2] coordinate of non-face pixels
nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(coarse_eye_xys)
dists, _ = nbrs.kneighbors(coarse_opened_eye_xys) # [512*512, 1] distance to nearest non-bg pixel
# print(dists.max())
non_opened_eye_pixs = dists > max(dists.max()*0.75, 4) # 大于这个距离的opened eye部分会被合上
non_opened_eye_pixs = non_opened_eye_pixs.reshape([-1])
opened_eye_xys_to_erode = coarse_opened_eye_xys[non_opened_eye_pixs]
opened_eye_mask[opened_eye_xys_to_erode[...,0], opened_eye_xys_to_erode[...,1]] = False # shrink 将mask在face-eye边界收缩3pixel,为了平滑
img[opened_eye_mask] = 0
return torch.tensor(img.astype(np.float32) / 127.5 - 1).permute(2,0,1)
# def hold_eye_opened_for_secc(img):
# img = copy.copy(img)
# eye_mask = cv2.imread('inference/os_avatar/opened_eye_mask.png')
# eye_mask = torch.nn.functional.interpolate(torch.tensor(eye_mask).permute(2,0,1).unsqueeze(0), size=(img.shape[-2], img.shape[-1]), mode='nearest')[0].bool().to(img.device) # [3,512,512]
# img[eye_mask] = -1
# return img
def blink_eye_for_secc(img, close_eye_percent=0.5):
"""
secc_img: [3,h,w], tensor, -1~1
"""
img = img.permute(1,2,0).cpu().numpy()
img = ((img +1)/2*255).astype(np.uint)
assert close_eye_percent <= 1.0 and close_eye_percent >= 0.
if close_eye_percent == 0: return torch.tensor(img.astype(np.float32) / 127.5 - 1).permute(2,0,1)
img = copy.deepcopy(img)
face_mask = (img[...,0] != 0) & (img[...,1] != 0) & (img[...,2] != 0)
h,w = face_mask.shape
# get face and eye mask
left_eye_prior_reigon = np.zeros([h,w], dtype=bool)
right_eye_prior_reigon = np.zeros([h,w], dtype=bool)
left_eye_prior_reigon[h//4:h//2, w//4:w//2] = True
right_eye_prior_reigon[h//4:h//2, w//2:w//4*3] = True
eye_prior_reigon = left_eye_prior_reigon | right_eye_prior_reigon
coarse_eye_mask = (~ face_mask) & eye_prior_reigon
coarse_left_eye_mask = (~ face_mask) & left_eye_prior_reigon
coarse_right_eye_mask = (~ face_mask) & right_eye_prior_reigon
coarse_eye_xys = np.stack(np.nonzero(coarse_eye_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
min_h = coarse_eye_xys[:, 0].min()
max_h = coarse_eye_xys[:, 0].max()
coarse_left_eye_xys = np.stack(np.nonzero(coarse_left_eye_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
left_min_w = coarse_left_eye_xys[:, 1].min()
left_max_w = coarse_left_eye_xys[:, 1].max()
coarse_right_eye_xys = np.stack(np.nonzero(coarse_right_eye_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
right_min_w = coarse_right_eye_xys[:, 1].min()
right_max_w = coarse_right_eye_xys[:, 1].max()
# 尽力较少需要考虑的face_xyz,以降低KNN的损耗
left_eye_prior_reigon = np.zeros([h,w], dtype=bool)
more_room = 4 # 过小会导致一些问题
left_eye_prior_reigon[min_h-more_room:max_h+more_room, left_min_w-more_room:left_max_w+more_room] = True
right_eye_prior_reigon = np.zeros([h,w], dtype=bool)
right_eye_prior_reigon[min_h-more_room:max_h+more_room, right_min_w-more_room:right_max_w+more_room] = True
eye_prior_reigon = left_eye_prior_reigon | right_eye_prior_reigon
around_eye_face_mask = face_mask & eye_prior_reigon
face_mask = around_eye_face_mask
face_xys = np.stack(np.nonzero(around_eye_face_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(coarse_eye_xys)
dists, _ = nbrs.kneighbors(face_xys) # [512*512, 1] distance to nearest non-bg pixel
face_pixs = dists > 5 # 只有距离最近的eye pixel大于5的才被认为是face,过小会导致一些问题
face_pixs = face_pixs.reshape([-1])
face_xys_to_erode = face_xys[~face_pixs]
face_mask[face_xys_to_erode[...,0], face_xys_to_erode[...,1]] = False # shrink 将mask在face-eye边界收缩3pixel,为了平滑
eye_mask = (~ face_mask) & eye_prior_reigon
h_grid = np.mgrid[0:h, 0:w][0]
eye_num_pixel_along_w_axis = eye_mask.sum(axis=0)
eye_mask_along_w_axis = eye_num_pixel_along_w_axis != 0
tmp_h_grid = h_grid.copy()
tmp_h_grid[~eye_mask] = 0
eye_mean_h_coord_along_w_axis = tmp_h_grid.sum(axis=0) / np.clip(eye_num_pixel_along_w_axis, a_min=1, a_max=h)
tmp_h_grid = h_grid.copy()
tmp_h_grid[~eye_mask] = 99999
eye_min_h_coord_along_w_axis = tmp_h_grid.min(axis=0)
tmp_h_grid = h_grid.copy()
tmp_h_grid[~eye_mask] = -99999
eye_max_h_coord_along_w_axis = tmp_h_grid.max(axis=0)
eye_low_h_coord_along_w_axis = close_eye_percent * eye_mean_h_coord_along_w_axis + (1-close_eye_percent) * eye_min_h_coord_along_w_axis # upper eye
eye_high_h_coord_along_w_axis = close_eye_percent * eye_mean_h_coord_along_w_axis + (1-close_eye_percent) * eye_max_h_coord_along_w_axis # lower eye
tmp_h_grid = h_grid.copy()
tmp_h_grid[~eye_mask] = 99999
upper_eye_blink_mask = tmp_h_grid <= eye_low_h_coord_along_w_axis
tmp_h_grid = h_grid.copy()
tmp_h_grid[~eye_mask] = -99999
lower_eye_blink_mask = tmp_h_grid >= eye_high_h_coord_along_w_axis
eye_blink_mask = upper_eye_blink_mask | lower_eye_blink_mask
face_xys = np.stack(np.nonzero(around_eye_face_mask)).transpose(1, 0) # [N_nonbg,2] coordinate of non-face pixels
eye_blink_xys = np.stack(np.nonzero(eye_blink_mask)).transpose(1, 0) # [N_nonbg,hw] coordinate of non-face pixels
nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(face_xys)
distances, indices = nbrs.kneighbors(eye_blink_xys)
bg_fg_xys = face_xys[indices[:, 0]]
img[eye_blink_xys[:, 0], eye_blink_xys[:, 1], :] = img[bg_fg_xys[:, 0], bg_fg_xys[:, 1], :]
return torch.tensor(img.astype(np.float32) / 127.5 - 1).permute(2,0,1)
if __name__ == '__main__':
import imageio
import tqdm
img = cv2.imread("assets/cano_secc.png")
img = img / 127.5 - 1
img = torch.FloatTensor(img).permute(2, 0, 1)
fps = 25
writer = imageio.get_writer('demo_blink.mp4', fps=fps)
for i in tqdm.trange(33):
blink_percent = 0.03 * i
with Timer("Blink", True):
out_img = blink_eye_for_secc(img, blink_percent)
out_img = ((out_img.permute(1,2,0)+1)*127.5).int().numpy()
writer.append_data(out_img)
writer.close() |