import gradio as gr from transformers import AutoImageProcessor, ResNetForImageClassification, YolosFeatureExtractor, YolosForObjectDetection import torch def detect(image1, image2): ### Image 1, the object ### processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50") model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50") inputs = processor(image, return_tensors="pt") with torch.no_grad(): logits = model(**inputs).logits # model predicts one of the 1000 ImageNet classes predicted_label = logits.argmax(-1).item() print(model.config.id2label[predicted_label]) ### Image 2, object detections ### from PIL import Image import requests feature_extractor = YolosFeatureExtractor.from_pretrained('hustvl/yolos-small') model = YolosForObjectDetection.from_pretrained('hustvl/yolos-small') inputs = feature_extractor(images=image, return_tensors="pt") outputs = model(**inputs) # model predicts bounding boxes and corresponding COCO classes logits = outputs.logits bboxes = outputs.pred_boxes return model.config.id2label[predicted_label], bboxes demo = gr.Interface( fn=detect, inputs=[gr.inputs.Image(label="Object to detect"), gr.inputs.Image(label="Image to detect object in")], outputs=["prediction", "bounding boxes"], title="Object Counts in Image" ) demo.launch()