FT_Llama / app.py
0llheaven's picture
Update app.py
e094577 verified
raw
history blame
1.89 kB
import gradio as gr
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor
from transformers import TextStreamer
from torchvision.transforms import Resize
# Define the model and processor
model_id = "0llheaven/Llama-3.2-11B-Vision-Radiology-mini"
device = "cuda" if torch.cuda.is_available() else "cpu"
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
load_in_4bit=True,
torch_dtype=torch.bfloat16,
device_map=device,
)
model.gradient_checkpointing_enable()
processor = AutoProcessor.from_pretrained(model_id)
# Function to process the image and generate the description
def generate_description(image: Image.Image, instruction: str):
image = image.convert("RGB")
# image = Resize((224, 224))(image)
# Create the message to pass to the model
instruction = "You are an expert radiographer. Describe accurately what you see in this image."
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": instruction}
]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
image,
input_text,
add_special_tokens=False,
return_tensors="pt"
).to(model.device)
# Generate the output from the model
output = model.generate(**inputs, max_new_tokens=256)
return processor.decode(output[0])
# Define Gradio interface
interface = gr.Interface(
fn=generate_description,
inputs=gr.Image(type="pil", label="Upload an Image"),
outputs=gr.Textbox(label="Generated Description"),
live=True,
title="Radiology Image Description Generator",
description="Upload an image and provide an instruction to generate a description using a vision-language model."
)
# Launch the interface
interface.launch()