Spaces:

FrameNetBrasil
/

Yolo11x_FM30k_Event_withcaption

Sleeping

File size: 2,152 Bytes

aa3bb8e

import cv2
import gradio as gr
from ultralytics import YOLO

# Load the model once globally
MODEL_PATH = "best.pt"  
model = YOLO(MODEL_PATH)

def detect_and_visualize(image):
    # image is a NumPy array from Gradio
    # Perform inference directly on this array
    results = model(image)
    
    # Ensure image is in the correct color space (most likely already RGB)
    annotated_image = image.copy()

    detections = []
    for result in results:
        boxes = result.boxes.xyxy.cpu().numpy()  
        confidences = result.boxes.conf.cpu().numpy()  
        class_ids = result.boxes.cls.cpu().numpy().astype(int)  

        for box, confidence, class_id in zip(boxes, confidences, class_ids):
            x_min, y_min, x_max, y_max = map(int, box)
            class_name = model.names[class_id]

            # Pick a color or use a fixed color, no need for random if not desired
            color = (0, 255, 0)
            cv2.rectangle(annotated_image, (x_min, y_min), (x_max, y_max), color, 2)
            label = f"{class_name} {confidence:.2f}"
            cv2.putText(annotated_image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            detections.append({
                "label": class_name,
                "confidence": float(confidence),
                "bounding_box": {
                    "x1": x_min,
                    "y1": y_min,
                    "x2": x_max,
                    "y2": y_max
                }
            })

    return annotated_image, detections

def gradio_interface(image):
    annotated_image, detections = detect_and_visualize(image)
    return annotated_image, detections

interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Image(type="numpy", label="Upload Image"),
    outputs=[
        gr.Image(type="numpy", label="Annotated Image"),
        gr.JSON(label="Detection Details")
    ],
    title="YOLO Object Detection",
    description="Upload an image to detect objects and view annotated results along with detailed detection data."
)

if __name__ == "__main__":
    interface.launch()