import torch from transformers import pipeline from PIL import Image import cv2 import numpy as np from random import choice import io detector50 = pipeline(model="facebook/detr-resnet-50") detector101 = pipeline(model="facebook/detr-resnet-101") import gradio as gr COLORS = ["#ff7f7f", "#ff7fbf", "#ff7fff", "#bf7fff", "#7f7fff", "#7fbfff", "#7fffff", "#7fffbf", "#7fff7f", "#bfff7f", "#ffff7f", "#ffbf7f"] fdic = { "family" : "Impact", "style" : "italic", "size" : 15, "color" : "yellow", "weight" : "bold" } def get_figure(in_pil_img, in_results): # Convert PIL image to OpenCV format img_cv2 = np.array(in_pil_img) img_cv2 = cv2.cvtColor(img_cv2, cv2.COLOR_RGB2BGR) for prediction in in_results: selected_color = choice(COLORS) color = tuple(int(selected_color[i:i+2], 16) for i in (1, 3, 5)) # Convert hex color to RGB tuple x, y = prediction['box']['xmin'], prediction['box']['ymin'] w, h = prediction['box']['xmax'] - prediction['box']['xmin'], prediction['box']['ymax'] - prediction['box']['ymin'] # Draw bounding box using OpenCV img_cv2 = cv2.rectangle(img_cv2, (x, y), (x+w, y+h), color, 2) text = f"{prediction['label']}: {round(prediction['score']*100, 1)}%" img_cv2 = cv2.putText(img_cv2, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2) # Convert back to PIL format img_pil = Image.fromarray(cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)) return img_pil def infer(model, in_pil_img): results = None if model == "detr-resnet-101": results = detector101(in_pil_img) else: results = detector50(in_pil_img) output_pil_img = get_figure(in_pil_img, results) output_pil_img.save("output.jpg") return output_pil_img with gr.Blocks(title="DETR Object Detection using openCV", css=".gradio-container {background:lightyellow;}" ) as demo: #sample_index = gr.State([]) gr.HTML("""