File size: 5,759 Bytes
187d856 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
from __future__ import annotations
from functools import partial
import mediapipe as mp
import numpy as np
from PIL import Image, ImageDraw
from adetailer import PredictOutput
from adetailer.common import create_bbox_from_mask, create_mask_from_bbox
def mediapipe_predict(
model_type: str, image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
mapping = {
"mediapipe_face_short": partial(mediapipe_face_detection, 0),
"mediapipe_face_full": partial(mediapipe_face_detection, 1),
"mediapipe_face_mesh": mediapipe_face_mesh,
"mediapipe_face_mesh_eyes_only": mediapipe_face_mesh_eyes_only,
}
if model_type in mapping:
func = mapping[model_type]
return func(image, confidence)
msg = f"[-] ADetailer: Invalid mediapipe model type: {model_type}, Available: {list(mapping.keys())!r}"
raise RuntimeError(msg)
def mediapipe_face_detection(
model_type: int, image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
img_width, img_height = image.size
mp_face_detection = mp.solutions.face_detection
draw_util = mp.solutions.drawing_utils
img_array = np.array(image)
with mp_face_detection.FaceDetection(
model_selection=model_type, min_detection_confidence=confidence
) as face_detector:
pred = face_detector.process(img_array)
if pred.detections is None:
return PredictOutput()
preview_array = img_array.copy()
bboxes = []
for detection in pred.detections:
draw_util.draw_detection(preview_array, detection)
bbox = detection.location_data.relative_bounding_box
x1 = bbox.xmin * img_width
y1 = bbox.ymin * img_height
w = bbox.width * img_width
h = bbox.height * img_height
x2 = x1 + w
y2 = y1 + h
bboxes.append([x1, y1, x2, y2])
masks = create_mask_from_bbox(bboxes, image.size)
preview = Image.fromarray(preview_array)
return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
def get_convexhull(points: np.ndarray) -> list[tuple[int, int]]:
"""
Parameters
----------
points: An ndarray of shape (n, 2) containing the 2D points.
Returns
-------
list[tuple[int, int]]: Input for the draw.polygon function
"""
from scipy.spatial import ConvexHull
hull = ConvexHull(points)
vertices = hull.vertices
return list(zip(points[vertices, 0], points[vertices, 1]))
def mediapipe_face_mesh(image: Image.Image, confidence: float = 0.3) -> PredictOutput:
mp_face_mesh = mp.solutions.face_mesh
draw_util = mp.solutions.drawing_utils
drawing_styles = mp.solutions.drawing_styles
w, h = image.size
with mp_face_mesh.FaceMesh(
static_image_mode=True, max_num_faces=20, min_detection_confidence=confidence
) as face_mesh:
arr = np.array(image)
pred = face_mesh.process(arr)
if pred.multi_face_landmarks is None:
return PredictOutput()
preview = arr.copy()
masks = []
for landmarks in pred.multi_face_landmarks:
draw_util.draw_landmarks(
image=preview,
landmark_list=landmarks,
connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=drawing_styles.get_default_face_mesh_tesselation_style(),
)
points = np.array([(land.x * w, land.y * h) for land in landmarks.landmark])
outline = get_convexhull(points)
mask = Image.new("L", image.size, "black")
draw = ImageDraw.Draw(mask)
draw.polygon(outline, fill="white")
masks.append(mask)
bboxes = create_bbox_from_mask(masks, image.size)
preview = Image.fromarray(preview)
return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
def mediapipe_face_mesh_eyes_only(
image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
mp_face_mesh = mp.solutions.face_mesh
left_idx = np.array(list(mp_face_mesh.FACEMESH_LEFT_EYE)).flatten()
right_idx = np.array(list(mp_face_mesh.FACEMESH_RIGHT_EYE)).flatten()
w, h = image.size
with mp_face_mesh.FaceMesh(
static_image_mode=True, max_num_faces=20, min_detection_confidence=confidence
) as face_mesh:
arr = np.array(image)
pred = face_mesh.process(arr)
if pred.multi_face_landmarks is None:
return PredictOutput()
preview = image.copy()
masks = []
for landmarks in pred.multi_face_landmarks:
points = np.array([(land.x * w, land.y * h) for land in landmarks.landmark])
left_eyes = points[left_idx]
right_eyes = points[right_idx]
left_outline = get_convexhull(left_eyes)
right_outline = get_convexhull(right_eyes)
mask = Image.new("L", image.size, "black")
draw = ImageDraw.Draw(mask)
for outline in (left_outline, right_outline):
draw.polygon(outline, fill="white")
masks.append(mask)
bboxes = create_bbox_from_mask(masks, image.size)
preview = draw_preview(preview, bboxes, masks)
return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
def draw_preview(
preview: Image.Image, bboxes: list[list[int]], masks: list[Image.Image]
) -> Image.Image:
red = Image.new("RGB", preview.size, "red")
for mask in masks:
masked = Image.composite(red, preview, mask)
preview = Image.blend(preview, masked, 0.25)
draw = ImageDraw.Draw(preview)
for bbox in bboxes:
draw.rectangle(bbox, outline="red", width=2)
return preview
|