Hatef Otroshi
Initial commit
9340724
raw
history blame
6.86 kB
from typing import Tuple
import numpy as np
import torch
from PIL import Image
from torch.autograd import Variable
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
from mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
from mtcnn_pytorch.src.first_stage import run_first_stage
from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
class MTCNN():
def __init__(self, device: str = 'cuda:0', crop_size: Tuple[int, int] = (112, 112)):
assert device in ['cuda:0', 'cpu']
self.device = torch.device(device)
assert crop_size in [(112, 112), (96, 112)]
self.crop_size = crop_size
# change working dir to this file location to load npz files. Then switch back
cwd = os.getcwd()
os.chdir(os.path.dirname(__file__))
self.pnet = PNet().to(self.device)
self.rnet = RNet().to(self.device)
self.onet = ONet().to(self.device)
self.pnet.eval()
self.rnet.eval()
self.onet.eval()
self.refrence = get_reference_facial_points(default_square=crop_size[0] == crop_size[1])
self.min_face_size = 20
self.thresholds = [0.6,0.7,0.9]
self.nms_thresholds = [0.7, 0.7, 0.7]
self.factor = 0.85
os.chdir(cwd)
def align(self, img):
_, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
return Image.fromarray(warped_face)
def align_multi(self, img, limit=None):
boxes, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
if limit:
boxes = boxes[:limit]
landmarks = landmarks[:limit]
faces = []
for landmark in landmarks:
facial5points = [[landmark[j], landmark[j + 5]] for j in range(5)]
warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
faces.append(Image.fromarray(warped_face))
return boxes, faces
def detect_faces(self, image, min_face_size, thresholds, nms_thresholds, factor):
"""
Arguments:
image: an instance of PIL.Image.
min_face_size: a float number.
thresholds: a list of length 3.
nms_thresholds: a list of length 3.
Returns:
two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
bounding boxes and facial landmarks.
"""
# BUILD AN IMAGE PYRAMID
width, height = image.size
min_length = min(height, width)
min_detection_size = 12
# factor = 0.707 # sqrt(0.5)
# scales for scaling the image
scales = []
# scales the image so that
# minimum size that we can detect equals to
# minimum face size that we want to detect
m = min_detection_size / min_face_size
min_length *= m
factor_count = 0
while min_length > min_detection_size:
scales.append(m * factor**factor_count)
min_length *= factor
factor_count += 1
# STAGE 1
# it will be returned
bounding_boxes = []
with torch.no_grad():
# run P-Net on different scales
for s in scales:
boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
bounding_boxes.append(boxes)
# collect boxes (and offsets, and scores) from different scales
bounding_boxes = [i for i in bounding_boxes if i is not None]
if len(bounding_boxes) == 0:
return [], []
bounding_boxes = np.vstack(bounding_boxes)
keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
bounding_boxes = bounding_boxes[keep]
# use offsets predicted by pnet to transform bounding boxes
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
# shape [n_boxes, 5]
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
# STAGE 2
img_boxes = get_image_boxes(bounding_boxes, image, size=24)
img_boxes = torch.FloatTensor(img_boxes).to(self.device)
output = self.rnet(img_boxes)
offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4]
probs = output[1].cpu().data.numpy() # shape [n_boxes, 2]
keep = np.where(probs[:, 1] > thresholds[1])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
offsets = offsets[keep]
keep = nms(bounding_boxes, nms_thresholds[1])
bounding_boxes = bounding_boxes[keep]
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
# STAGE 3
img_boxes = get_image_boxes(bounding_boxes, image, size=48)
if len(img_boxes) == 0:
return [], []
img_boxes = torch.FloatTensor(img_boxes).to(self.device)
output = self.onet(img_boxes)
landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10]
offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4]
probs = output[2].cpu().data.numpy() # shape [n_boxes, 2]
keep = np.where(probs[:, 1] > thresholds[2])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
offsets = offsets[keep]
landmarks = landmarks[keep]
# compute landmark points
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
bounding_boxes = calibrate_box(bounding_boxes, offsets)
keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
bounding_boxes = bounding_boxes[keep]
landmarks = landmarks[keep]
return bounding_boxes, landmarks