IDEFICS3_ROCO / app.py
æLtorio
zerogpu initialization
ebf3ae4 unverified
raw
history blame
1.28 kB
import gradio as gr
from transformers import AutoProcessor, Idefics3ForConditionalGeneration, image_utils
import torch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model_id="eltorio/IDEFICS3_ROCO"
# model = AutoModelForImageTextToText.from_pretrained(model_id).to(device)
base_model_path="HuggingFaceM4/Idefics3-8B-Llama3" #or change to local path
processor = AutoProcessor.from_pretrained(base_model_path)
model = Idefics3ForConditionalGeneration.from_pretrained(
base_model_path, torch_dtype=torch.bfloat16
).to(device)
model.load_adapter(model_id)
def infere(image):
messages = [
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": "What do we see in this image?"},
]
},
]
prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(text=prompt, images=[image], return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
generated_ids = model.generate(**inputs, max_new_tokens=8192)
generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
return generated_texts
demo = gr.Interface(fn=infere, inputs="image", outputs="text")
demo.launch()