import os import cv2 import numpy as np from PIL import Image from batch_face import RetinaFace def _get_square_face(coord, image, padding_scale = 1.5): x1, y1, x2, y2 = coord # expand the face region by {padding_scale} times length = ((x2 - x1) + (y2 - y1)) // 2 x1 = x1 - length * (padding_scale - 1.0) x2 = x2 + length * (padding_scale - 1.0) y1 = y1 - length * (padding_scale - 1.0) y2 = y2 + length * (padding_scale - 1.0) # Move the center upside a little y1 -= length * (padding_scale - 1.0) * 0.2 y2 -= length * (padding_scale - 1.0) * 0.2 # get square image center = (x1 + x2) // 2, (y1 + y2) // 2 length = max(x2 - x1, y2 - y1) // 2 x1 = max(int(round(center[0] - length)), 0) x2 = min(int(round(center[0] + length)), image.shape[1]) y1 = max(int(round(center[1] - length)), 0) y2 = min(int(round(center[1] + length)), image.shape[0]) return image[y1:y2, x1:x2] def _get_face_coord(face_detector, frame_cv2): faces = face_detector(frame_cv2, cv=True) if len(faces) == 0: raise ValueError("Face is not detected") else: coord = faces[0][0] return coord def _smooth_coord(last_coord, current_coord, smooth_factor=0.1): change = np.array(current_coord) - np.array(last_coord) # smooth the change to 0.1 times change = change * smooth_factor return (np.array(last_coord) + np.array(change)).astype(int).tolist() def get_face_img(face_detector, input_frame_path): print("Detecting face in the image...") frame_cv2 = cv2.imread(input_frame_path) coord = _get_face_coord(face_detector, frame_cv2) face = _get_square_face(coord, frame_cv2) face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) return Image.fromarray(face), coord def get_faces_video(face_detector, input_video_path): output_frames = [] output_coords = [] last_coord = None print("Detecting faces in the video...") cap = cv2.VideoCapture(input_video_path) while cap.isOpened(): ret, frame = cap.read() if not ret: break face_coord = _get_face_coord(face_detector, frame) if last_coord is not None: face_coord = _smooth_coord(last_coord, face_coord) last_coord = face_coord face = _get_square_face(face_coord, frame) face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) face_pil = Image.fromarray(face) output_frames.append(face_pil) output_coords.append(face_coord) cap.release() return output_frames, output_coords if __name__ == '__main__': import torch face_detector = RetinaFace(gpu_id=0) if torch.cuda.is_available() else RetinaFace(gpu_id=-1) # test for image input_frame_path = './test_imgs/makeup/1.jpg' face, _ = get_face_img(face_detector, input_frame_path) face.save('face.png') print("Image saved to face.png") # test for video import imageio from tqdm import tqdm frames, _ = get_faces_video(face_detector, './test_imgs/input_video.mp4') print("Number of frames: ", len(frames)) writer = imageio.get_writer('face.mp4', fps=30, macro_block_size=1, quality=8, codec="libx264") for frame in tqdm(frames): writer.append_data(np.array(frame.resize((512, 512)))) writer.close() print("Video saved to face.mp4")