import numpy as np import gradio as gr from ultralytics import YOLO import tempfile import cv2 def inference(image, video, model_id, image_size, conf_threshold): if model_id == "yolov10n-obb": model = YOLO("pretrained/yolov10n-obb.pt") elif model_id == "yolov10s-640-obb": model = YOLO("pretrained/yolov10s-640-obb.pt") elif model_id == "yolov10s-obb": model = YOLO("pretrained/yolov10s-obb.pt") elif model_id == "yolov10m-obb": model = YOLO("pretrained/yolov10m-obb.pt") elif model_id == "yolov10b-obb": model = YOLO("pretrained/yolov10b-obb.pt") elif model_id == "yolov10l-obb": model = YOLO("pretrained/yolov10l-obb.pt") elif model_id == "yolov10x-obb": model = YOLO("pretrained/yolov10x-obb.pt") if image: results = model.predict(source=image, imgsz=image_size, conf=conf_threshold, device="cpu") annotated_image = results[0].plot() return annotated_image[:, :, ::-1], None else: video_path = tempfile.mktemp(suffix=".webm") with open(video_path, "wb") as f: with open(video, "rb") as g: f.write(g.read()) cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_video_path = tempfile.mktemp(suffix=".webm") out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp90'), fps, (frame_width, frame_height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold) annotated_frame = results[0].plot() out.write(annotated_frame) cap.release() out.release() return None, output_video_path def inference_for_examples(image, model_path, image_size, conf_threshold): annotated_image, _ = inference(image, None, model_path, image_size, conf_threshold) return annotated_image def app(): with gr.Blocks(): with gr.Row(): with gr.Column(): image = gr.Image(type="pil", label="Image", visible=True) video = gr.Video(label="Video", visible=False) input_type = gr.Radio( choices=["Image", "Video"], value="Image", label="Input Type", ) model_id = gr.Dropdown( label="Model", choices=[ "yolov10n-obb", "yolov10s-640-obb", "yolov10s-obb", "yolov10m-obb", "yolov10b-obb", "yolov10l-obb", "yolov10x-obb", ], value="yolov10n-obb", ) image_size = gr.Slider( label="Image Size", minimum=320, maximum=1280, step=32, value=640, ) conf_threshold = gr.Slider( label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.25, ) inferBtn = gr.Button(value="Detect") with gr.Column(): output_image = gr.Image(type="numpy", label="Annotated Image", visible=True) output_video = gr.Video(label="Annotated Video", visible=False) def update_visibility(input_type): image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) return image, video, output_image, output_video input_type.change( fn=update_visibility, inputs=[input_type], outputs=[image, video, output_image, output_video], ) def run_inference(image, video, model_id, image_size, conf_threshold, input_type): if input_type == "Image": return inference(image, None, model_id, image_size, conf_threshold) else: return inference(None, video, model_id, image_size, conf_threshold) inferBtn.click( fn=run_inference, inputs=[image, video, model_id, image_size, conf_threshold, input_type], outputs=[output_image, output_video], ) gr.Examples( examples=[ [ "test_images/P0024.jpg", "yolov10n-obb", 1024, 0.25, ], [ "test_images/P0035.jpg", "yolov10n-obb", 1024, 0.25, ], [ "test_images/P0121.jpg", "yolov10n-obb", 1024, 0.25, ], [ "test_images/P0180.jpg", "yolov10n-obb", 1024, 0.25, ], [ "test_images/P0279.jpg", "yolov10n-obb", 1024, 0.25, ], [ "test_images/P2112.jpg", "yolov10n-obb", 1024, 0.25, ], ], fn=inference_for_examples, inputs=[ image, model_id, image_size, conf_threshold, ], outputs=[output_image], cache_examples='lazy', ) gradio_app = gr.Blocks() with gradio_app: gr.Markdown( """ # YOLOv10 - OBB (Oriented Bounding Box) for more detail description about this model, please visit [here](https://github.com/hamhanry/YOLOv10-OBB) """ ) with gr.Row(): with gr.Column(): app() if __name__ == '__main__': gradio_app.queue() gradio_app.launch()