Spaces:
Build error
Build error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
from PIL import Image | |
import requests | |
from io import BytesIO | |
# Load the Qwen-VL model and tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval() | |
def generate_predictions(image_input, text_input): | |
# Save the image locally to match the original example | |
user_image_path = "/tmp/user_input_test_image.jpg" | |
image_input.save(user_image_path) | |
image_input = Image.open(user_image_path) | |
# Prepare the inputs | |
query = tokenizer.from_list_format([ | |
{'image': user_image_path}, | |
{'text': text_input}, | |
]) | |
inputs = tokenizer(query, return_tensors='pt') | |
inputs = inputs.to(model.device) | |
# Generate the caption | |
pred = model.generate(**inputs) | |
response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False) | |
# Draw bounding boxes if any | |
image_with_boxes = tokenizer.draw_bbox_on_latest_picture(response) | |
return image_with_boxes, response | |
# Create Gradio Interface | |
iface = gr.Interface( | |
fn=generate_predictions, | |
inputs=["image", "text"], | |
outputs=["image", "text"] | |
) | |
iface.launch() | |