qwen-vl

Paused

qwen-vl / app.py

Create app.py

5038429 almost 2 years ago

1.32 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	from PIL import Image
	import requests
	from io import BytesIO

	# Load the Qwen-VL model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval()

	def generate_predictions(image_input, text_input):
	# Save the image locally to match the original example
	user_image_path = "/tmp/user_input_test_image.jpg"
	image_input.save(user_image_path)
	image_input = Image.open(user_image_path)

	# Prepare the inputs
	query = tokenizer.from_list_format([
	{'image': user_image_path},
	{'text': text_input},
	])
	inputs = tokenizer(query, return_tensors='pt')
	inputs = inputs.to(model.device)

	# Generate the caption
	pred = model.generate(**inputs)
	response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)

	# Draw bounding boxes if any
	image_with_boxes = tokenizer.draw_bbox_on_latest_picture(response)

	return image_with_boxes, response

	# Create Gradio Interface
	iface = gr.Interface(
	fn=generate_predictions,
	inputs=["image", "text"],
	outputs=["image", "text"]
	)

	iface.launch()