adt2

File size: 5,759 Bytes

187d856

from __future__ import annotations

from functools import partial

import mediapipe as mp
import numpy as np
from PIL import Image, ImageDraw

from adetailer import PredictOutput
from adetailer.common import create_bbox_from_mask, create_mask_from_bbox


def mediapipe_predict(
    model_type: str, image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
    mapping = {
        "mediapipe_face_short": partial(mediapipe_face_detection, 0),
        "mediapipe_face_full": partial(mediapipe_face_detection, 1),
        "mediapipe_face_mesh": mediapipe_face_mesh,
        "mediapipe_face_mesh_eyes_only": mediapipe_face_mesh_eyes_only,
    }
    if model_type in mapping:
        func = mapping[model_type]
        return func(image, confidence)
    msg = f"[-] ADetailer: Invalid mediapipe model type: {model_type}, Available: {list(mapping.keys())!r}"
    raise RuntimeError(msg)


def mediapipe_face_detection(
    model_type: int, image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
    img_width, img_height = image.size

    mp_face_detection = mp.solutions.face_detection
    draw_util = mp.solutions.drawing_utils

    img_array = np.array(image)

    with mp_face_detection.FaceDetection(
        model_selection=model_type, min_detection_confidence=confidence
    ) as face_detector:
        pred = face_detector.process(img_array)

    if pred.detections is None:
        return PredictOutput()

    preview_array = img_array.copy()

    bboxes = []
    for detection in pred.detections:
        draw_util.draw_detection(preview_array, detection)

        bbox = detection.location_data.relative_bounding_box
        x1 = bbox.xmin * img_width
        y1 = bbox.ymin * img_height
        w = bbox.width * img_width
        h = bbox.height * img_height
        x2 = x1 + w
        y2 = y1 + h

        bboxes.append([x1, y1, x2, y2])

    masks = create_mask_from_bbox(bboxes, image.size)
    preview = Image.fromarray(preview_array)

    return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)


def get_convexhull(points: np.ndarray) -> list[tuple[int, int]]:
    """
    Parameters
    ----------
      points: An ndarray of shape (n, 2) containing the 2D points.

    Returns
    -------
      list[tuple[int, int]]: Input for the draw.polygon function
    """
    from scipy.spatial import ConvexHull

    hull = ConvexHull(points)
    vertices = hull.vertices
    return list(zip(points[vertices, 0], points[vertices, 1]))


def mediapipe_face_mesh(image: Image.Image, confidence: float = 0.3) -> PredictOutput:
    mp_face_mesh = mp.solutions.face_mesh
    draw_util = mp.solutions.drawing_utils
    drawing_styles = mp.solutions.drawing_styles

    w, h = image.size

    with mp_face_mesh.FaceMesh(
        static_image_mode=True, max_num_faces=20, min_detection_confidence=confidence
    ) as face_mesh:
        arr = np.array(image)
        pred = face_mesh.process(arr)

        if pred.multi_face_landmarks is None:
            return PredictOutput()

        preview = arr.copy()
        masks = []

        for landmarks in pred.multi_face_landmarks:
            draw_util.draw_landmarks(
                image=preview,
                landmark_list=landmarks,
                connections=mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=None,
                connection_drawing_spec=drawing_styles.get_default_face_mesh_tesselation_style(),
            )

            points = np.array([(land.x * w, land.y * h) for land in landmarks.landmark])
            outline = get_convexhull(points)

            mask = Image.new("L", image.size, "black")
            draw = ImageDraw.Draw(mask)
            draw.polygon(outline, fill="white")
            masks.append(mask)

        bboxes = create_bbox_from_mask(masks, image.size)
        preview = Image.fromarray(preview)
        return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)


def mediapipe_face_mesh_eyes_only(
    image: Image.Image, confidence: float = 0.3
) -> PredictOutput:
    mp_face_mesh = mp.solutions.face_mesh

    left_idx = np.array(list(mp_face_mesh.FACEMESH_LEFT_EYE)).flatten()
    right_idx = np.array(list(mp_face_mesh.FACEMESH_RIGHT_EYE)).flatten()

    w, h = image.size

    with mp_face_mesh.FaceMesh(
        static_image_mode=True, max_num_faces=20, min_detection_confidence=confidence
    ) as face_mesh:
        arr = np.array(image)
        pred = face_mesh.process(arr)

        if pred.multi_face_landmarks is None:
            return PredictOutput()

        preview = image.copy()
        masks = []

        for landmarks in pred.multi_face_landmarks:
            points = np.array([(land.x * w, land.y * h) for land in landmarks.landmark])
            left_eyes = points[left_idx]
            right_eyes = points[right_idx]
            left_outline = get_convexhull(left_eyes)
            right_outline = get_convexhull(right_eyes)

            mask = Image.new("L", image.size, "black")
            draw = ImageDraw.Draw(mask)
            for outline in (left_outline, right_outline):
                draw.polygon(outline, fill="white")
            masks.append(mask)

        bboxes = create_bbox_from_mask(masks, image.size)
        preview = draw_preview(preview, bboxes, masks)
        return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)


def draw_preview(
    preview: Image.Image, bboxes: list[list[int]], masks: list[Image.Image]
) -> Image.Image:
    red = Image.new("RGB", preview.size, "red")
    for mask in masks:
        masked = Image.composite(red, preview, mask)
        preview = Image.blend(preview, masked, 0.25)

    draw = ImageDraw.Draw(preview)
    for bbox in bboxes:
        draw.rectangle(bbox, outline="red", width=2)

    return preview