File size: 2,494 Bytes
1a75096
 
 
 
 
516d9b1
1a75096
 
 
 
670a735
516d9b1
1a75096
 
c034823
1a75096
 
 
 
 
 
 
 
 
 
c034823
1a75096
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f2adbf
 
1a75096
 
2c61af9
5f2adbf
 
670a735
 
 
5f2adbf
 
1a75096
 
 
2c61af9
5f2adbf
6801b2a
5f2adbf
6801b2a
2c61af9
 
5f2adbf
670a735
 
 
2c61af9
1a75096
 
 
5f2adbf
1a75096
 
 
2c61af9
 
1a75096
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python

from __future__ import annotations

import cv2
import gradio as gr
import huggingface_hub
import insightface
import numpy as np
import onnxruntime as ort
from PIL import Image

TITLE = "insightface Person Detection"
DESCRIPTION = "https://github.com/deepinsight/insightface/tree/master/examples/person_detection"

def load_model():
    path = huggingface_hub.hf_hub_download("public-data/insightface", "models/scrfd_person_2.5g.onnx")
    options = ort.SessionOptions()
    options.intra_op_num_threads = 8
    options.inter_op_num_threads = 8
    session = ort.InferenceSession(
        path, sess_options=options, providers=["CPUExecutionProvider", "CUDAExecutionProvider"]
    )
    model = insightface.model_zoo.retinaface.RetinaFace(model_file=path, session=session)
    return model

def detect_person(
    img: np.ndarray, detector: insightface.model_zoo.retinaface.RetinaFace
) -> tuple[np.ndarray, np.ndarray]:
    bboxes, kpss = detector.detect(img)
    bboxes = np.round(bboxes[:, :4]).astype(int)
    kpss = np.round(kpss).astype(int)
    kpss[:, :, 0] = np.clip(kpss[:, :, 0], 0, img.shape[1])
    kpss[:, :, 1] = np.clip(kpss[:, :, 1], 0, img.shape[0])
    vbboxes = bboxes.copy()
    vbboxes[:, 0] = kpss[:, 0, 0]
    vbboxes[:, 1] = kpss[:, 0, 1]
    vbboxes[:, 2] = kpss[:, 4, 0]
    vbboxes[:, 3] = kpss[:, 4, 1]
    return bboxes, vbboxes

def visualize(image: np.ndarray, bboxes: np.ndarray, vbboxes: np.ndarray) -> list[np.ndarray]:
    person_images = []
    for i in range(bboxes.shape[0]):
        bbox = bboxes[i]
        x1, y1, x2, y2 = bbox
        person_img = image[y1:y2, x1:x2]
        
        # Convert numpy array to PIL Image and append
        pil_img = Image.fromarray(person_img)
        person_images.append(pil_img)

    return person_images

detector = load_model()
detector.prepare(-1, nms_thresh=0.5, input_size=(640, 640))

def detect(image: np.ndarray) -> list[np.ndarray]:
    if image is None:
        return []

    image = image[:, :, ::-1]  # RGB -> BGR
    bboxes, vbboxes = detect_person(image, detector)
    person_images = visualize(image, bboxes, vbboxes)
    
    # Convert PIL images to numpy arrays and return
    return [np.array(img) for img in person_images]

demo = gr.Interface(
    fn=detect,
    inputs=gr.Image(label="Input", type="numpy"),
    outputs=gr.Gallery(label="Detected Persons"),
    title=TITLE,
    description=DESCRIPTION,
)

if __name__ == "__main__":
    demo.queue(max_size=10).launch()