captain-awesome's picture
Update app.py
25a3c0f verified
raw
history blame
1.72 kB
from transformers import DetrImageProcessor, DetrForObjectDetection
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from PIL import Image
import requests
import gradio as gr
box_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
box_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
def predict_bounding_boxes(imageurl:str):
try:
response = requests.get(imageurl, stream=True)
response.raise_for_status()
image_data = Image.open(response.raw)
inputs = box_processor(images=image_data, return_tensors="pt")
outputs = box_model(**inputs)
target_sizes = torch.tensor([image_data.size[::-1]])
results = box_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.70)[0]
detections = [{"score": score.item(), "label": box_model.config.id2label[label.item()], "box": box.tolist()} for score, label, box in zip(results["scores"], results["labels"], results["boxes"])]
raw_image = image_data.convert('RGB')
inputs = caption_processor(raw_image, return_tensors="pt")
out = caption_model.generate(**inputs)
label = caption_processor.decode(out[0], skip_special_tokens=True)
return {"image label": label, "detections": detections}
except Exception as e:
return {"error": str(e)}
app = gr.Interface(fn=predict_bounding_boxes, inputs="text", outputs="json")
app.api = True
app.launch()