adt2 / adetailer /mediapipe.py

Upload 36 files

187d856 almost 2 years ago

5.76 kB

	from __future__ import annotations

	from functools import partial

	import mediapipe as mp
	import numpy as np
	from PIL import Image, ImageDraw

	from adetailer import PredictOutput
	from adetailer.common import create_bbox_from_mask, create_mask_from_bbox


	def mediapipe_predict(
	model_type: str, image: Image.Image, confidence: float = 0.3
	) -> PredictOutput:
	mapping = {
	"mediapipe_face_short": partial(mediapipe_face_detection, 0),
	"mediapipe_face_full": partial(mediapipe_face_detection, 1),
	"mediapipe_face_mesh": mediapipe_face_mesh,
	"mediapipe_face_mesh_eyes_only": mediapipe_face_mesh_eyes_only,
	}
	if model_type in mapping:
	func = mapping[model_type]
	return func(image, confidence)
	msg = f"[-] ADetailer: Invalid mediapipe model type: {model_type}, Available: {list(mapping.keys())!r}"
	raise RuntimeError(msg)


	def mediapipe_face_detection(
	model_type: int, image: Image.Image, confidence: float = 0.3
	) -> PredictOutput:
	img_width, img_height = image.size

	mp_face_detection = mp.solutions.face_detection
	draw_util = mp.solutions.drawing_utils

	img_array = np.array(image)

	with mp_face_detection.FaceDetection(
	model_selection=model_type, min_detection_confidence=confidence
	) as face_detector:
	pred = face_detector.process(img_array)

	if pred.detections is None:
	return PredictOutput()

	preview_array = img_array.copy()

	bboxes = []
	for detection in pred.detections:
	draw_util.draw_detection(preview_array, detection)

	bbox = detection.location_data.relative_bounding_box
	x1 = bbox.xmin * img_width
	y1 = bbox.ymin * img_height
	w = bbox.width * img_width
	h = bbox.height * img_height
	x2 = x1 + w
	y2 = y1 + h

	bboxes.append([x1, y1, x2, y2])

	masks = create_mask_from_bbox(bboxes, image.size)
	preview = Image.fromarray(preview_array)

	return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)


	def get_convexhull(points: np.ndarray) -> list[tuple[int, int]]:
	"""
	Parameters
	----------
	points: An ndarray of shape (n, 2) containing the 2D points.

	Returns
	-------
	list[tuple[int, int]]: Input for the draw.polygon function
	"""
	from scipy.spatial import ConvexHull

	hull = ConvexHull(points)
	vertices = hull.vertices
	return list(zip(points[vertices, 0], points[vertices, 1]))


	def mediapipe_face_mesh(image: Image.Image, confidence: float = 0.3) -> PredictOutput:
	mp_face_mesh = mp.solutions.face_mesh
	draw_util = mp.solutions.drawing_utils
	drawing_styles = mp.solutions.drawing_styles

	w, h = image.size

	with mp_face_mesh.FaceMesh(
	static_image_mode=True, max_num_faces=20, min_detection_confidence=confidence
	) as face_mesh:
	arr = np.array(image)
	pred = face_mesh.process(arr)

	if pred.multi_face_landmarks is None:
	return PredictOutput()

	preview = arr.copy()
	masks = []

	for landmarks in pred.multi_face_landmarks:
	draw_util.draw_landmarks(
	image=preview,
	landmark_list=landmarks,
	connections=mp_face_mesh.FACEMESH_TESSELATION,
	landmark_drawing_spec=None,
	connection_drawing_spec=drawing_styles.get_default_face_mesh_tesselation_style(),
	)

	points = np.array([(land.x * w, land.y * h) for land in landmarks.landmark])
	outline = get_convexhull(points)

	mask = Image.new("L", image.size, "black")
	draw = ImageDraw.Draw(mask)
	draw.polygon(outline, fill="white")
	masks.append(mask)

	bboxes = create_bbox_from_mask(masks, image.size)
	preview = Image.fromarray(preview)
	return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)


	def mediapipe_face_mesh_eyes_only(
	image: Image.Image, confidence: float = 0.3
	) -> PredictOutput:
	mp_face_mesh = mp.solutions.face_mesh

	left_idx = np.array(list(mp_face_mesh.FACEMESH_LEFT_EYE)).flatten()
	right_idx = np.array(list(mp_face_mesh.FACEMESH_RIGHT_EYE)).flatten()

	w, h = image.size

	with mp_face_mesh.FaceMesh(
	static_image_mode=True, max_num_faces=20, min_detection_confidence=confidence
	) as face_mesh:
	arr = np.array(image)
	pred = face_mesh.process(arr)

	if pred.multi_face_landmarks is None:
	return PredictOutput()

	preview = image.copy()
	masks = []

	for landmarks in pred.multi_face_landmarks:
	points = np.array([(land.x * w, land.y * h) for land in landmarks.landmark])
	left_eyes = points[left_idx]
	right_eyes = points[right_idx]
	left_outline = get_convexhull(left_eyes)
	right_outline = get_convexhull(right_eyes)

	mask = Image.new("L", image.size, "black")
	draw = ImageDraw.Draw(mask)
	for outline in (left_outline, right_outline):
	draw.polygon(outline, fill="white")
	masks.append(mask)

	bboxes = create_bbox_from_mask(masks, image.size)
	preview = draw_preview(preview, bboxes, masks)
	return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)


	def draw_preview(
	preview: Image.Image, bboxes: list[list[int]], masks: list[Image.Image]
	) -> Image.Image:
	red = Image.new("RGB", preview.size, "red")
	for mask in masks:
	masked = Image.composite(red, preview, mask)
	preview = Image.blend(preview, masked, 0.25)

	draw = ImageDraw.Draw(preview)
	for bbox in bboxes:
	draw.rectangle(bbox, outline="red", width=2)

	return preview