Spaces:

sam749
/

YuNet-face-detection

Running

File size: 3,990 Bytes

b03e0d7

import os
import cv2
import cv2 as cv
import numpy as np
import gradio as gr
from yunet import YuNet


# Valid combinations of backends and targets
backend_target_pairs = [
    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
    [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
    [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
    [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
    [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU],
]


class ImageResizer:
    def __init__(
        self,
        modelPath,
        input_size=(320, 320),
        conf_threshold=0.6,
        nms_threshold=0.3,
        top_k=5000,
        backend_id=0,
        target_id=0,
    ):
        self.model = YuNet(
            modelPath=modelPath,
            inputSize=input_size,
            confThreshold=conf_threshold,
            nmsThreshold=nms_threshold,
            topK=top_k,
            backendId=backend_id,
            targetId=target_id,
        )

    def detect(self, image, num_faces=None):
        # If input is an image
        if image is not None:
            h, w, _ = image.shape

            # Inference
            self.model.setInputSize([w, h])
            results = self.model.infer(image)

            faces = results[:num_faces] if num_faces else results

            bboxs = []

            for face in faces:
                bbox = face[0:4].astype(np.int32)  # x,y,w,h
                x, y, w, h = bbox
                # draw
                cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
                bboxs.append(bbox)

            return image, bboxs

    def resize(self, image, target_size=512, above_head_ratio=0.5):
        height, width, _c = image.shape
        ar = width / height
        # downscale the image
        if not target_size:
            target_size = 512
        if ar > 1:
            # Landscape
            new_height = target_size
            new_width = int(target_size * ar)
        elif ar < 1:
            # Portrait
            new_width = target_size
            new_height = int(target_size / ar)
        else:
            # Square
            new_width = target_size
            new_height = target_size

        resized = cv2.resize(
            image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
        )

        # Perform object detection on the resized image
        dt_image, bboxes = self.detect(resized.copy())

        # crop around face
        if len(bboxes) >= 1:
            x, y, w, h = bboxes[0]
        else:
            x, y, w, h = 0, 0, target_size, target_size
        # 20% of image height
        above_head_max = int(target_size * above_head_ratio)
        x_center = int((x + (x + w)) / 2)
        y_center = int((y + (y + h)) / 2)
        # Calculate cropping box
        left = int(max(0, x_center - target_size // 2))
        top = int(max(0, y_center - above_head_max))
        right = min(left + target_size, resized.shape[1])
        bottom = min(top + target_size, resized.shape[0])

        cropped_image = resized[top:bottom, left:right]
        return dt_image, cropped_image


model_path = "face_detection_yunet_2023mar.onnx"
image_resizer = ImageResizer(modelPath=model_path)


def face_detector(input_image, target_size=512):
    return image_resizer.resize(input_image, target_size)


inputs = [
    gr.Image(sources=["upload", "clipboard"], type="numpy"),
    gr.Dropdown(
        choices=[512, 768, 1024],
        value=512,
        allow_custom_value=True,
        info="Target size of images",
    ),
]
outputs = [
    gr.Image(label="face detection", format="JPEG"),
    gr.Image(label="focused resized", format="JPEG"),
]
demo = gr.Interface(
    fn=face_detector,
    inputs=inputs,
    outputs=outputs,
    title="Image Resizer",
    theme="gradio/monochrome",
    api_name="resize",
    submit_btn=gr.Button("Resize", variant="primary"),
    allow_flagging="never",
)
demo.queue(
    max_size=10,
)

if __name__ == "__main__":
    demo.launch()