Spaces:

NandiniLokeshReddy
/

QwenBaseModel

Build error

App Files Files Community

QwenBaseModel / app.py

NandiniLokeshReddy

Update app.py

761cff5 verified 10 months ago

raw

history blame contribute delete

1.98 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from PIL import Image
	import warnings

	# Suppress warnings
	warnings.filterwarnings('ignore')

	# Ensure CUDA device is used
	torch.set_default_device('cuda')

	# Load the model and tokenizer
	model_name = 'qnguyen3/nanoLLaVA-1.5'
	try:
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map='auto',
	trust_remote_code=True
	)
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)
	except ImportError as e:
	print("Error: Missing required dependencies. Make sure flash_attn is installed.")
	raise e

	# Function to describe the uploaded image
	def describe_image(image, prompt="Describe this image in detail"):
	messages = [{"role": "user", "content": f'<image>\n{prompt}'}]
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Tokenize the text
	text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
	input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)

	# Process the image
	image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)

	# Generate a response
	output_ids = model.generate(
	input_ids,
	images=image_tensor,
	max_new_tokens=2048,
	use_cache=True
	)[0]

	# Decode and return the response
	description = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
	return description

	# Set up the Gradio interface
	gr.Interface(
	fn=describe_image,
	inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox(default="Describe this image in detail")],
	outputs="text",
	title="Image Description Model",
	description="Upload an image and receive a detailed description."
	).launch()