Spaces:

mrcuddle
/

Lumimaid-Pixtral

Runtime error

File size: 1,462 Bytes

5dbeef5
bbc5484
5dbeef5
d071aca
82c21d0
6889bb8
bec473c
ab78f84
d7b210e
76c5275
bbc5484
2713103
 
5dbeef5
82c21d0
d071aca
 
 
 
941f385
 
bec473c
d071aca
 
 
5dbeef5
 
941f385
bd6f6b2
5dbeef5
 
bd6f6b2
5dbeef5
 
 
 
 
 
b5272ae
5dbeef5
bec473c
 
5dbeef5

import gradio as gr
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import torch
import spaces

# Load the Llava model and processor
model_id = "mrcuddle/lumimaid-v0.2-8b-pixtral"
processor = AutoProcessor.from_pretrained(model_id)

model = LlavaForConditionalGeneration.from_pretrained(model_id).to("cuda")
print(model.config)


@spaces.GPU
def generate_text(input_text="", image=None):
    if image is None:
        return "Please upload an image."

    # Resize the image to the expected resolution (adjust size if necessary)
    image = image.resize((336, 336))

    # Use a default prompt if no text is provided
    if not input_text:
        input_text = "Describe the image."

    # Prepare inputs
    inputs = processor(text=input_text, images=image, return_tensors="pt").to("cuda")

    # Generate output
    outputs = model.generate(**inputs)
    generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]

    return generated_text

# Create Gradio interface
iface = gr.Interface(
    fn=generate_text,
    inputs=[gr.Textbox(label="Enter your text here (optional)", value=""), gr.Image(label="Upload an image", type="pil")],
    outputs=gr.Textbox(label="Generated Text"),
    title="Llava Model Interaction",
    description="Interact with the Llava model using text and image inputs. If no text is provided, the model will describe the image."
)

# Launch the interface
iface.launch()