Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoProcessor, LlavaForConditionalGeneration | |
from PIL import Image | |
import torch | |
import spaces | |
# Load the Llava model and processor | |
model_id = "mrcuddle/lumimaid-v0.2-8b-pixtral" | |
processor = AutoProcessor.from_pretrained(model_id) | |
model = LlavaForConditionalGeneration.from_pretrained(model_id).to("cuda") | |
print(model.config) | |
def generate_text(input_text="", image=None): | |
if image is None: | |
return "Please upload an image." | |
# Resize the image to the expected resolution (adjust size if necessary) | |
image = image.resize((336, 336)) | |
# Use a default prompt if no text is provided | |
if not input_text: | |
input_text = "Describe the image." | |
# Prepare inputs | |
inputs = processor(text=input_text, images=image, return_tensors="pt").to("cuda") | |
# Generate output | |
outputs = model.generate(**inputs) | |
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
return generated_text | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=generate_text, | |
inputs=[gr.Textbox(label="Enter your text here (optional)", value=""), gr.Image(label="Upload an image", type="pil")], | |
outputs=gr.Textbox(label="Generated Text"), | |
title="Llava Model Interaction", | |
description="Interact with the Llava model using text and image inputs. If no text is provided, the model will describe the image." | |
) | |
# Launch the interface | |
iface.launch() | |