import gradio as gr
from transformers import AutoImageProcessor, AutoModelForObjectDetection
from PIL import Image, ImageDraw
import torch

image_processor = AutoImageProcessor.from_pretrained('hustvl/yolos-small')
model = AutoModelForObjectDetection.from_pretrained('hustvl/yolos-small')

def detect(image): 
    inputs = image_processor(images=image, return_tensors="pt")
    outputs = model(**inputs)

    # convert outputs to COCO API
    target_sizes = torch.tensor([image.size[::-1]])
    results = image_processor.post_process_object_detection(outputs,
                                                            threshold=0.9,
                                                            target_sizes=target_sizes)[0]

    # Bounding box in COCO format:
    # [x_min, y_min, width, height]
    
    # model predicts bounding boxes and corresponding COCO classes
    #logits = outputs.logits
    #bboxes = outputs.pred_boxes

    draw = ImageDraw.Draw(image)
    
    # label and the count
    counts = {}

    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        box = [round(i, 2) for i in box.tolist()]
        label_name = model.config.id2label[label.item()]
        if label_name not in counts:
            counts[label_name] = 0
        counts[label_name] += 1

        x, y, w, h = tuple(box)
        draw.rectangle((x, y, x+w, y+h), outline="red", width=1)
        draw.text((x, y), label_name, fill="white")
        
    return results, image

demo = gr.Interface(
    fn=detect,
    inputs=[gr.inputs.Image(label="Input image", type="pil")],
    outputs=["text", "image"], #, gr.Label(num_top_classes=10)],
    title="Object Counts in Image"
)

demo.launch()