Intelligent-Medical-Guidance-Large-Model
/
server
/digital_human
/modules
/musetalk
/utils
/blending.py
from PIL import Image | |
import numpy as np | |
import cv2 | |
from face_parsing import FaceParsing | |
def init_face_parsing_model( | |
resnet_path="./models/face-parse-bisent/resnet18-5c106cde.pth", face_model_pth="./models/face-parse-bisent/79999_iter.pth" | |
): | |
fp_model = FaceParsing(resnet_path, face_model_pth) | |
return fp_model | |
def get_crop_box(box, expand): | |
x, y, x1, y1 = box | |
x_c, y_c = (x + x1) // 2, (y + y1) // 2 | |
w, h = x1 - x, y1 - y | |
s = int(max(w, h) // 2 * expand) | |
crop_box = [x_c - s, y_c - s, x_c + s, y_c + s] | |
return crop_box, s | |
def face_seg(image, fp_model): | |
seg_image = fp_model(image) | |
if seg_image is None: | |
print("error, no person_segment") | |
return None | |
seg_image = seg_image.resize(image.size) | |
return seg_image | |
def get_image(image, face, face_box, fp_model, upper_boundary_ratio=0.5, expand=1.2): | |
# print(image.shape) | |
# print(face.shape) | |
body = Image.fromarray(image[:, :, ::-1]) | |
face = Image.fromarray(face[:, :, ::-1]) | |
x, y, x1, y1 = face_box | |
# print(x1-x,y1-y) | |
crop_box, s = get_crop_box(face_box, expand) | |
x_s, y_s, x_e, y_e = crop_box | |
face_position = (x, y) | |
face_large = body.crop(crop_box) | |
ori_shape = face_large.size | |
mask_image = face_seg(face_large, fp_model) | |
mask_small = mask_image.crop((x - x_s, y - y_s, x1 - x_s, y1 - y_s)) | |
mask_image = Image.new("L", ori_shape, 0) | |
mask_image.paste(mask_small, (x - x_s, y - y_s, x1 - x_s, y1 - y_s)) | |
# keep upper_boundary_ratio of talking area | |
width, height = mask_image.size | |
top_boundary = int(height * upper_boundary_ratio) | |
modified_mask_image = Image.new("L", ori_shape, 0) | |
modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary)) | |
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1 | |
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0) | |
mask_image = Image.fromarray(mask_array) | |
face_large.paste(face, (x - x_s, y - y_s, x1 - x_s, y1 - y_s)) | |
body.paste(face_large, crop_box[:2], mask_image) | |
body = np.array(body) | |
return body[:, :, ::-1] | |
def get_image_prepare_material(image, face_box, fp_model, upper_boundary_ratio=0.5, expand=1.2): | |
body = Image.fromarray(image[:, :, ::-1]) | |
x, y, x1, y1 = face_box | |
# print(x1-x,y1-y) | |
crop_box, s = get_crop_box(face_box, expand) | |
x_s, y_s, x_e, y_e = crop_box | |
face_large = body.crop(crop_box) | |
ori_shape = face_large.size | |
mask_image = face_seg(face_large, fp_model) | |
mask_small = mask_image.crop((x - x_s, y - y_s, x1 - x_s, y1 - y_s)) | |
mask_image = Image.new("L", ori_shape, 0) | |
mask_image.paste(mask_small, (x - x_s, y - y_s, x1 - x_s, y1 - y_s)) | |
# keep upper_boundary_ratio of talking area | |
width, height = mask_image.size | |
top_boundary = int(height * upper_boundary_ratio) | |
modified_mask_image = Image.new("L", ori_shape, 0) | |
modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary)) | |
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1 | |
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0) | |
return mask_array, crop_box | |
def get_image_blending(image, face, face_box, mask_array, crop_box): | |
body = Image.fromarray(image[:, :, ::-1]) | |
face = Image.fromarray(face[:, :, ::-1]) | |
x, y, x1, y1 = face_box | |
x_s, y_s, x_e, y_e = crop_box | |
face_large = body.crop(crop_box) | |
mask_image = Image.fromarray(mask_array) | |
mask_image = mask_image.convert("L") | |
face_large.paste(face, (x - x_s, y - y_s, x1 - x_s, y1 - y_s)) | |
body.paste(face_large, crop_box[:2], mask_image) | |
body = np.array(body) | |
return body[:, :, ::-1] | |