Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import torch
|
|
5 |
import numpy as np
|
6 |
import spaces
|
7 |
|
8 |
-
# Load the
|
9 |
model_id = "mrcuddle/lumimaid-v0.2-8b-pixtral"
|
10 |
processor = AutoProcessor.from_pretrained(model_id)
|
11 |
model = LlavaForConditionalGeneration.from_pretrained(model_id, ignore_mismatched_sizes=True).to("cuda")
|
@@ -19,6 +19,9 @@ def generate_text(input_text="", image=None):
|
|
19 |
image_np = np.array(image)
|
20 |
image_pil = Image.fromarray(image_np.astype('uint8'), 'RGB')
|
21 |
|
|
|
|
|
|
|
22 |
# Use a default prompt if no text is provided
|
23 |
if not input_text:
|
24 |
input_text = "Describe the image."
|
@@ -26,10 +29,10 @@ def generate_text(input_text="", image=None):
|
|
26 |
# Prepare inputs
|
27 |
inputs = processor(text=input_text, images=image_pil, return_tensors="pt").to("cuda")
|
28 |
|
29 |
-
# Debug: Print the keys and
|
30 |
print("Processor output keys:", inputs.keys())
|
31 |
for key, value in inputs.items():
|
32 |
-
print(f"{key}: {value
|
33 |
|
34 |
# Check if image tokens are generated
|
35 |
if 'input_ids' not in inputs or inputs['input_ids'].numel() == 0:
|
@@ -46,8 +49,8 @@ iface = gr.Interface(
|
|
46 |
fn=generate_text,
|
47 |
inputs=[gr.Textbox(label="Enter your text here (optional)", value=""), gr.Image(label="Upload an image", type="pil")],
|
48 |
outputs=gr.Textbox(label="Generated Text"),
|
49 |
-
title="
|
50 |
-
description="Interact with the
|
51 |
)
|
52 |
|
53 |
# Launch the interface
|
|
|
5 |
import numpy as np
|
6 |
import spaces
|
7 |
|
8 |
+
# Load the Llava model and processor
|
9 |
model_id = "mrcuddle/lumimaid-v0.2-8b-pixtral"
|
10 |
processor = AutoProcessor.from_pretrained(model_id)
|
11 |
model = LlavaForConditionalGeneration.from_pretrained(model_id, ignore_mismatched_sizes=True).to("cuda")
|
|
|
19 |
image_np = np.array(image)
|
20 |
image_pil = Image.fromarray(image_np.astype('uint8'), 'RGB')
|
21 |
|
22 |
+
# Resize the image to the expected resolution (336 x 336)
|
23 |
+
image_pil = image_pil.resize((336, 336))
|
24 |
+
|
25 |
# Use a default prompt if no text is provided
|
26 |
if not input_text:
|
27 |
input_text = "Describe the image."
|
|
|
29 |
# Prepare inputs
|
30 |
inputs = processor(text=input_text, images=image_pil, return_tensors="pt").to("cuda")
|
31 |
|
32 |
+
# Debug: Print the keys and types of the inputs dictionary
|
33 |
print("Processor output keys:", inputs.keys())
|
34 |
for key, value in inputs.items():
|
35 |
+
print(f"{key}: {type(value)}")
|
36 |
|
37 |
# Check if image tokens are generated
|
38 |
if 'input_ids' not in inputs or inputs['input_ids'].numel() == 0:
|
|
|
49 |
fn=generate_text,
|
50 |
inputs=[gr.Textbox(label="Enter your text here (optional)", value=""), gr.Image(label="Upload an image", type="pil")],
|
51 |
outputs=gr.Textbox(label="Generated Text"),
|
52 |
+
title="Llava Model Interaction",
|
53 |
+
description="Interact with the Llava model using text and image inputs. If no text is provided, the model will describe the image."
|
54 |
)
|
55 |
|
56 |
# Launch the interface
|