#!/usr/bin/env python

from __future__ import annotations

import cv2
import gradio as gr
import huggingface_hub
import insightface
import numpy as np
import onnxruntime as ort
from PIL import Image

TITLE = "insightface Person Detection"
DESCRIPTION = "https://github.com/deepinsight/insightface/tree/master/examples/person_detection"

def load_model():
    path = huggingface_hub.hf_hub_download("public-data/insightface", "models/scrfd_person_2.5g.onnx")
    options = ort.SessionOptions()
    options.intra_op_num_threads = 8
    options.inter_op_num_threads = 8
    session = ort.InferenceSession(
        path, sess_options=options, providers=["CPUExecutionProvider", "CUDAExecutionProvider"]
    )
    model = insightface.model_zoo.retinaface.RetinaFace(model_file=path, session=session)
    return model

def detect_person(
    img: np.ndarray, detector: insightface.model_zoo.retinaface.RetinaFace
) -> tuple[np.ndarray, np.ndarray]:
    bboxes, kpss = detector.detect(img)
    bboxes = np.round(bboxes[:, :4]).astype(int)
    kpss = np.round(kpss).astype(int)
    kpss[:, :, 0] = np.clip(kpss[:, :, 0], 0, img.shape[1])
    kpss[:, :, 1] = np.clip(kpss[:, :, 1], 0, img.shape[0])
    vbboxes = bboxes.copy()
    vbboxes[:, 0] = kpss[:, 0, 0]
    vbboxes[:, 1] = kpss[:, 0, 1]
    vbboxes[:, 2] = kpss[:, 4, 0]
    vbboxes[:, 3] = kpss[:, 4, 1]
    return bboxes, vbboxes

def visualize(image: np.ndarray, bboxes: np.ndarray, vbboxes: np.ndarray) -> list[np.ndarray]:
    person_images = []
    for i in range(bboxes.shape[0]):
        bbox = bboxes[i]
        x1, y1, x2, y2 = bbox
        person_img = image[y1:y2, x1:x2]
        
        # Convert numpy array to PIL Image and append
        pil_img = Image.fromarray(person_img)
        person_images.append(pil_img)

    return person_images

detector = load_model()
detector.prepare(-1, nms_thresh=0.5, input_size=(640, 640))

def detect(image: np.ndarray) -> list[np.ndarray]:
    if image is None:
        return []

    image = image[:, :, ::-1]  # RGB -> BGR
    bboxes, vbboxes = detect_person(image, detector)
    person_images = visualize(image, bboxes, vbboxes)
    
    # Convert PIL images to numpy arrays and return
    return [np.array(img) for img in person_images]

demo = gr.Interface(
    fn=detect,
    inputs=gr.Image(label="Input", type="numpy"),
    outputs=gr.Gallery(label="Detected Persons"),
    title=TITLE,
    description=DESCRIPTION,
)

if __name__ == "__main__":
    demo.queue(max_size=10).launch()