mrcuddle commited on
Commit
941f385
Β·
verified Β·
1 Parent(s): bec473c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -17
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  from transformers import AutoProcessor, LlavaForConditionalGeneration
3
  from PIL import Image
4
  import torch
5
- import numpy as np
6
  import spaces
7
 
8
  # Load the Llava model and processor
@@ -15,28 +14,15 @@ def generate_text(input_text="", image=None):
15
  if image is None:
16
  return "Please upload an image."
17
 
18
- # Convert the uploaded image to a NumPy array and then to PIL format
19
- image_np = np.array(image)
20
- image_pil = Image.fromarray(image_np.astype('uint8'), 'RGB')
21
-
22
- # Resize the image to the expected resolution (336 x 336)
23
- image_pil = image_pil.resize((336, 336))
24
 
25
  # Use a default prompt if no text is provided
26
  if not input_text:
27
  input_text = "Describe the image."
28
 
29
  # Prepare inputs
30
- inputs = processor(text=input_text, images=image_pil, return_tensors="pt").to("cuda")
31
-
32
- # Debug: Print the keys and types of the inputs dictionary
33
- print("Processor output keys:", inputs.keys())
34
- for key, value in inputs.items():
35
- print(f"{key}: {type(value)}")
36
-
37
- # Check if image tokens are generated
38
- if 'input_ids' not in inputs or inputs['input_ids'].numel() == 0:
39
- return "Error: No image tokens generated. Please check the image and try again."
40
 
41
  # Generate output
42
  outputs = model.generate(**inputs)
 
2
  from transformers import AutoProcessor, LlavaForConditionalGeneration
3
  from PIL import Image
4
  import torch
 
5
  import spaces
6
 
7
  # Load the Llava model and processor
 
14
  if image is None:
15
  return "Please upload an image."
16
 
17
+ # Resize the image to the expected resolution (adjust size if necessary)
18
+ image = image.resize((336, 336))
 
 
 
 
19
 
20
  # Use a default prompt if no text is provided
21
  if not input_text:
22
  input_text = "Describe the image."
23
 
24
  # Prepare inputs
25
+ inputs = processor(text=input_text, images=image, return_tensors="pt").to("cuda")
 
 
 
 
 
 
 
 
 
26
 
27
  # Generate output
28
  outputs = model.generate(**inputs)