adt2 / adetailer /mediapipe.py
ddoc's picture
Upload 36 files
187d856
from __future__ import annotations
from functools import partial
import mediapipe as mp
import numpy as np
from PIL import Image, ImageDraw
from adetailer import PredictOutput
from adetailer.common import create_bbox_from_mask, create_mask_from_bbox
def mediapipe_predict(
model_type: str, image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
mapping = {
"mediapipe_face_short": partial(mediapipe_face_detection, 0),
"mediapipe_face_full": partial(mediapipe_face_detection, 1),
"mediapipe_face_mesh": mediapipe_face_mesh,
"mediapipe_face_mesh_eyes_only": mediapipe_face_mesh_eyes_only,
}
if model_type in mapping:
func = mapping[model_type]
return func(image, confidence)
msg = f"[-] ADetailer: Invalid mediapipe model type: {model_type}, Available: {list(mapping.keys())!r}"
raise RuntimeError(msg)
def mediapipe_face_detection(
model_type: int, image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
img_width, img_height = image.size
mp_face_detection = mp.solutions.face_detection
draw_util = mp.solutions.drawing_utils
img_array = np.array(image)
with mp_face_detection.FaceDetection(
model_selection=model_type, min_detection_confidence=confidence
) as face_detector:
pred = face_detector.process(img_array)
if pred.detections is None:
return PredictOutput()
preview_array = img_array.copy()
bboxes = []
for detection in pred.detections:
draw_util.draw_detection(preview_array, detection)
bbox = detection.location_data.relative_bounding_box
x1 = bbox.xmin * img_width
y1 = bbox.ymin * img_height
w = bbox.width * img_width
h = bbox.height * img_height
x2 = x1 + w
y2 = y1 + h
bboxes.append([x1, y1, x2, y2])
masks = create_mask_from_bbox(bboxes, image.size)
preview = Image.fromarray(preview_array)
return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
def get_convexhull(points: np.ndarray) -> list[tuple[int, int]]:
"""
Parameters
----------
points: An ndarray of shape (n, 2) containing the 2D points.
Returns
-------
list[tuple[int, int]]: Input for the draw.polygon function
"""
from scipy.spatial import ConvexHull
hull = ConvexHull(points)
vertices = hull.vertices
return list(zip(points[vertices, 0], points[vertices, 1]))
def mediapipe_face_mesh(image: Image.Image, confidence: float = 0.3) -> PredictOutput:
mp_face_mesh = mp.solutions.face_mesh
draw_util = mp.solutions.drawing_utils
drawing_styles = mp.solutions.drawing_styles
w, h = image.size
with mp_face_mesh.FaceMesh(
static_image_mode=True, max_num_faces=20, min_detection_confidence=confidence
) as face_mesh:
arr = np.array(image)
pred = face_mesh.process(arr)
if pred.multi_face_landmarks is None:
return PredictOutput()
preview = arr.copy()
masks = []
for landmarks in pred.multi_face_landmarks:
draw_util.draw_landmarks(
image=preview,
landmark_list=landmarks,
connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=drawing_styles.get_default_face_mesh_tesselation_style(),
)
points = np.array([(land.x * w, land.y * h) for land in landmarks.landmark])
outline = get_convexhull(points)
mask = Image.new("L", image.size, "black")
draw = ImageDraw.Draw(mask)
draw.polygon(outline, fill="white")
masks.append(mask)
bboxes = create_bbox_from_mask(masks, image.size)
preview = Image.fromarray(preview)
return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
def mediapipe_face_mesh_eyes_only(
image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
mp_face_mesh = mp.solutions.face_mesh
left_idx = np.array(list(mp_face_mesh.FACEMESH_LEFT_EYE)).flatten()
right_idx = np.array(list(mp_face_mesh.FACEMESH_RIGHT_EYE)).flatten()
w, h = image.size
with mp_face_mesh.FaceMesh(
static_image_mode=True, max_num_faces=20, min_detection_confidence=confidence
) as face_mesh:
arr = np.array(image)
pred = face_mesh.process(arr)
if pred.multi_face_landmarks is None:
return PredictOutput()
preview = image.copy()
masks = []
for landmarks in pred.multi_face_landmarks:
points = np.array([(land.x * w, land.y * h) for land in landmarks.landmark])
left_eyes = points[left_idx]
right_eyes = points[right_idx]
left_outline = get_convexhull(left_eyes)
right_outline = get_convexhull(right_eyes)
mask = Image.new("L", image.size, "black")
draw = ImageDraw.Draw(mask)
for outline in (left_outline, right_outline):
draw.polygon(outline, fill="white")
masks.append(mask)
bboxes = create_bbox_from_mask(masks, image.size)
preview = draw_preview(preview, bboxes, masks)
return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
def draw_preview(
preview: Image.Image, bboxes: list[list[int]], masks: list[Image.Image]
) -> Image.Image:
red = Image.new("RGB", preview.size, "red")
for mask in masks:
masked = Image.composite(red, preview, mask)
preview = Image.blend(preview, masked, 0.25)
draw = ImageDraw.Draw(preview)
for bbox in bboxes:
draw.rectangle(bbox, outline="red", width=2)
return preview