Spaces:

0llheaven
/

FT_Llama

Runtime error

App Files Files Community

FT_Llama / app.py

0llheaven

Update app.py

70ce441 verified 7 months ago

raw

history blame

2.95 kB

	import spaces
	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoModelForImageTextToText, MllamaForConditionalGeneration, AutoProcessor
	from transformers import TextStreamer
	from torchvision.transforms import Resize
	from unsloth import FastVisionModel

	# Define the model and processor
	model_id = "0llheaven/Llama-3.2-11B-Vision-Radiology-mini"

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")

	# device = "cuda" if torch.cuda.is_available() else "cpu"
	model = AutoModelForImageTextToText.from_pretrained(
	model_id,
	# load_in_4bit=True,
	torch_dtype=torch.float32 if device.type == "cpu" else torch.bfloat16,
	device_map="auto" if device.type == "cuda" else None,
	).to(device)

	if device.type == "cuda":
	model.gradient_checkpointing_enable()
	# model.gradient_checkpointing_enable()

	processor = AutoProcessor.from_pretrained(model_id)

	@spaces.GPU(duration=120)
	# Function to process the image and generate the description
	def generate_description(image: Image.Image, instruction: str):

	FastVisionModel.for_inference(model)
	print("กำลังโหลด tokenizer...")
	base_model, tokenizer = FastVisionModel.from_pretrained(
	"unsloth/Llama-3.2-11B-Vision-Instruct",
	# load_in_4bit = True,
	use_gradient_checkpointing = "unsloth",
	)

	image = image.convert("RGB")
	# image = Resize((224, 224))(image)

	# Create the message to pass to the model
	instruction = "You are an expert radiographer. Describe accurately what you see in this image."
	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": instruction}
	]}
	]

	input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
	# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
	inputs = tokenizer(
	image,
	input_text,
	add_special_tokens=False,
	return_tensors="pt"
	).to(device)

	# Generate the output from the model
	# output = model.generate(**inputs, max_new_tokens=256)
	text_streamer = TextStreamer(tokenizer, skip_prompt=True)
	outputs = model.generate(
	**inputs,
	streamer=text_streamer,
	max_new_tokens=256,
	use_cache=True,
	temperature=1.5,
	min_p=0.1
	)
	return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

	# Define Gradio interface
	interface = gr.Interface(
	fn=generate_description,
	inputs=gr.Image(type="pil", label="Upload an Image"),
	outputs=gr.Textbox(label="Generated Description"),
	# live=True,
	title="Radiology Image Description Generator",
	description="Upload an image and provide an instruction to generate a description using a vision-language model."
	)

	# Launch the interface
	interface.launch()