mrcuddle commited on
Commit
d071aca
Β·
verified Β·
1 Parent(s): d7b210e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -1,20 +1,23 @@
1
  import gradio as gr
2
  from transformers import AutoProcessor, LlavaForConditionalGeneration
3
  from PIL import Image
4
- import requests
5
- from io import BytesIO
6
- import spaces
7
 
8
  # Load the Pixtral model and processor
9
  model_id = "mrcuddle/lumimaid-v0.2-8b-pixtral"
10
  processor = AutoProcessor.from_pretrained(model_id)
11
  model = LlavaForConditionalGeneration.from_pretrained(model_id, ignore_mismatched_sizes=True).to("cuda")
12
 
13
- @spaces.GPU
14
- def generate_text(input_text, image_url):
15
- # Load image
16
- response = requests.get(image_url)
17
- image = Image.open(BytesIO(response.content))
 
 
 
 
 
18
 
19
  # Prepare inputs
20
  inputs = processor(text=input_text, images=image, return_tensors="pt").to("cuda")
@@ -28,10 +31,10 @@ def generate_text(input_text, image_url):
28
  # Create Gradio interface
29
  iface = gr.Interface(
30
  fn=generate_text,
31
- inputs=[gr.Textbox(label="Enter your text here"), gr.Textbox(label="Enter image URL")],
32
  outputs=gr.Textbox(label="Generated Text"),
33
  title="Pixtral Model Interaction",
34
- description="Interact with the Pixtral model using text and image inputs."
35
  )
36
 
37
  # Launch the interface
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, LlavaForConditionalGeneration
3
  from PIL import Image
4
+ import torch
 
 
5
 
6
  # Load the Pixtral model and processor
7
  model_id = "mrcuddle/lumimaid-v0.2-8b-pixtral"
8
  processor = AutoProcessor.from_pretrained(model_id)
9
  model = LlavaForConditionalGeneration.from_pretrained(model_id, ignore_mismatched_sizes=True).to("cuda")
10
 
11
+ def generate_text(input_text="", image=None):
12
+ if image is None:
13
+ return "Please upload an image."
14
+
15
+ # Convert the uploaded image to PIL format
16
+ image = Image.fromarray(image.astype('uint8'), 'RGB')
17
+
18
+ # Use a default prompt if no text is provided
19
+ if not input_text:
20
+ input_text = "Describe the image."
21
 
22
  # Prepare inputs
23
  inputs = processor(text=input_text, images=image, return_tensors="pt").to("cuda")
 
31
  # Create Gradio interface
32
  iface = gr.Interface(
33
  fn=generate_text,
34
+ inputs=[gr.Textbox(label="Enter your text here (optional)", default=""), gr.Image(label="Upload an image", type="pil")],
35
  outputs=gr.Textbox(label="Generated Text"),
36
  title="Pixtral Model Interaction",
37
+ description="Interact with the Pixtral model using text and image inputs. If no text is provided, the model will describe the image."
38
  )
39
 
40
  # Launch the interface