File size: 3,320 Bytes
516d9b1
 
c034823
516d9b1
 
 
 
 
 
5ab0afc
516d9b1
bef99af
 
516d9b1
c034823
516d9b1
bef99af
516d9b1
 
 
08ba7c3
550f163
bef99af
 
516d9b1
 
c034823
516d9b1
 
70eacb3
 
bfe786b
70eacb3
 
 
 
 
 
 
 
 
 
c034823
70eacb3
 
 
 
 
 
 
2c61af9
 
 
 
 
 
 
 
 
 
 
c034823
2c61af9
 
 
 
 
c034823
2c61af9
 
 
c034823
2c61af9
 
 
c034823
2c61af9
 
 
 
 
 
 
c034823
2c61af9
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from __future__ import annotations
import pathlib
import io
import cv2
import gradio as gr
import huggingface_hub
import insightface
import numpy as np
import onnxruntime as ort
from PIL import Image

TITLE = "insightface Person Detection"
DESCRIPTION = "https://github.com/deepinsight/insightface/tree/master/examples/person_detection"


def load_model():
    path = huggingface_hub.hf_hub_download("public-data/insightface", "models/scrfd_person_2.5g.onnx")
    options = ort.SessionOptions()
    options.intra_op_num_threads = 8
    options.inter_op_num_threads = 8
    session = ort.InferenceSession(
        path, sess_options=options, providers=["CPUExecutionProvider"]
    )
    model = insightface.model_zoo.retinaface.RetinaFace(model_file=path, session=session)
    return model


def detect_person(
    img: np.ndarray, detector: insightface.model_zoo.retinaface.RetinaFace
) -> tuple[np.ndarray, np.ndarray]:
    bboxes, kpss = detector.detect(img)
    bboxes = np.round(bboxes[:, :4]).astype(int)
    kpss = np.round(kpss).astype(int)
    kpss[:, :, 0] = np.clip(kpss[:, :, 0], 0, img.shape[1])
    kpss[:, :, 1] = np.clip(kpss[:, :, 1], 0, img.shape[0])
    vbboxes = bboxes.copy()
    vbboxes[:, 0] = kpss[:, 0, 0]
    vbboxes[:, 1] = kpss[:, 0, 1]
    vbboxes[:, 2] = kpss[:, 4, 0]
    vbboxes[:, 3] = kpss[:, 4, 1]
    return bboxes, vbboxes


def visualize(image: np.ndarray, bboxes: np.ndarray, vbboxes: np.ndarray) -> np.ndarray:
    res = image.copy()
    for i in range(bboxes.shape[0]):
        bbox = bboxes[i]
        vbbox = vbboxes[i]
        x1, y1, x2, y2 = bbox
        vx1, vy1, vx2, vy2 = vbbox
        cv2.rectangle(res, (x1, y1), (x2, y2), (0, 255, 0), 1)
        alpha = 0.8
        color = (255, 0, 0)
        for c in range(3):
            res[vy1:vy2, vx1:vx2, c] = res[vy1:vy2, vx1:vx2, c] * alpha + color[c] * (1.0 - alpha)
        cv2.circle(res, (vx1, vy1), 1, color, 2)
        cv2.circle(res, (vx1, vy2), 1, color, 2)
        cv2.circle(res, (vx2, vy1), 1, color, 2)
        cv2.circle(res, (vx2, vy2), 1, color, 2)
    return res


def extract_persons(image: np.ndarray, bboxes: np.ndarray) -> list[Image.Image]:
    person_images = []
    for bbox in bboxes:
        x1, y1, x2, y2 = bbox
        person_image = image[y1:y2, x1:x2]  # Crop the detected person
        person_pil_image = Image.fromarray(person_image).convert('RGB')  # Convert to RGB
        person_images.append(person_pil_image)
    return person_images


detector = load_model()
detector.prepare(-1, nms_thresh=0.5, input_size=(640, 640))


def detect(image: np.ndarray) -> tuple[Image.Image, list[Image.Image]]:
    image = image[:, :, ::-1]  # RGB -> BGR
    bboxes, vbboxes = detect_person(image, detector)
    res = visualize(image, bboxes, vbboxes)
    person_images = extract_persons(res, bboxes)
    return Image.fromarray(res[:, :, ::-1], 'RGB'), person_images  # BGR -> RGB


examples = sorted(pathlib.Path("images").glob("*.jpg"))

demo = gr.Interface(
    fn=detect,
    inputs=gr.Image(label="Input", type="numpy"),
    outputs=[gr.Image(label="Processed Image", type="numpy"), gr.Gallery(label="Detected Persons", type="numpy")],
    examples=examples,
    examples_per_page=30,
    title=TITLE,
    description=DESCRIPTION,
)

if __name__ == "__main__":
    demo.queue(max_size=10).launch()