Spaces:

mrcuddle
/

Lumimaid-Pixtral

Runtime error

mrcuddle commited on Dec 19, 2024

Commit

d071aca

verified ·

1 Parent(s): d7b210e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,20 +1,23 @@
 import gradio as gr
 from transformers import AutoProcessor, LlavaForConditionalGeneration
 from PIL import Image
-import requests
-from io import BytesIO
-import spaces
 # Load the Pixtral model and processor
 model_id = "mrcuddle/lumimaid-v0.2-8b-pixtral"
 processor = AutoProcessor.from_pretrained(model_id)
 model = LlavaForConditionalGeneration.from_pretrained(model_id, ignore_mismatched_sizes=True).to("cuda")
-@spaces.GPU
-def generate_text(input_text, image_url):
-    # Load image
-    response = requests.get(image_url)
-    image = Image.open(BytesIO(response.content))
     # Prepare inputs
     inputs = processor(text=input_text, images=image, return_tensors="pt").to("cuda")
@@ -28,10 +31,10 @@ def generate_text(input_text, image_url):
 # Create Gradio interface
 iface = gr.Interface(
     fn=generate_text,
-    inputs=[gr.Textbox(label="Enter your text here"), gr.Textbox(label="Enter image URL")],
     outputs=gr.Textbox(label="Generated Text"),
     title="Pixtral Model Interaction",
-    description="Interact with the Pixtral model using text and image inputs."
 )
 # Launch the interface

 import gradio as gr
 from transformers import AutoProcessor, LlavaForConditionalGeneration
 from PIL import Image
+import torch
 # Load the Pixtral model and processor
 model_id = "mrcuddle/lumimaid-v0.2-8b-pixtral"
 processor = AutoProcessor.from_pretrained(model_id)
 model = LlavaForConditionalGeneration.from_pretrained(model_id, ignore_mismatched_sizes=True).to("cuda")
+def generate_text(input_text="", image=None):
+    if image is None:
+        return "Please upload an image."
+    # Convert the uploaded image to PIL format
+    image = Image.fromarray(image.astype('uint8'), 'RGB')
+    # Use a default prompt if no text is provided
+    if not input_text:
+        input_text = "Describe the image."
     # Prepare inputs
     inputs = processor(text=input_text, images=image, return_tensors="pt").to("cuda")
 # Create Gradio interface
 iface = gr.Interface(
     fn=generate_text,
+    inputs=[gr.Textbox(label="Enter your text here (optional)", default=""), gr.Image(label="Upload an image", type="pil")],
     outputs=gr.Textbox(label="Generated Text"),
     title="Pixtral Model Interaction",
+    description="Interact with the Pixtral model using text and image inputs. If no text is provided, the model will describe the image."
 )
 # Launch the interface