sagar007 commited on
Commit
7d4688b
·
verified ·
1 Parent(s): bb4d7fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -110,8 +110,12 @@ def process_image_input(image, vision_model, processor):
110
  return "Error: Vision model is not available."
111
 
112
  try:
 
 
 
 
 
113
  # Process the image
114
- image = Image.open(io.BytesIO(image)).convert('RGB')
115
  inputs = processor(images=image, return_tensors="pt").to(vision_model.device)
116
 
117
  # Generate text
@@ -129,6 +133,7 @@ def process_image_input(image, vision_model, processor):
129
  return generated_text
130
  except Exception as e:
131
  return f"Error processing image: {str(e)}"
 
132
  # Generate response within a GPU-decorated function
133
  @spaces.GPU
134
  def generate_response(transcription, sarvam_pipe):
@@ -192,6 +197,7 @@ def indic_vision_assistant(input_type, audio_input, text_input, image_input):
192
  elif input_type == "text" and text_input:
193
  transcription = text_input
194
  elif input_type == "image" and image_input is not None:
 
195
  transcription = process_image_input(image_input, vision_model, processor)
196
  else:
197
  return "Please provide either audio, text, or image input.", "No input provided.", None
 
110
  return "Error: Vision model is not available."
111
 
112
  try:
113
+ # Check if image is already a PIL Image
114
+ if not isinstance(image, Image.Image):
115
+ # If it's not, assume it's a file path or bytes and open it
116
+ image = Image.open(image).convert('RGB')
117
+
118
  # Process the image
 
119
  inputs = processor(images=image, return_tensors="pt").to(vision_model.device)
120
 
121
  # Generate text
 
133
  return generated_text
134
  except Exception as e:
135
  return f"Error processing image: {str(e)}"
136
+
137
  # Generate response within a GPU-decorated function
138
  @spaces.GPU
139
  def generate_response(transcription, sarvam_pipe):
 
197
  elif input_type == "text" and text_input:
198
  transcription = text_input
199
  elif input_type == "image" and image_input is not None:
200
+ # Directly pass the image_input to process_image_input
201
  transcription = process_image_input(image_input, vision_model, processor)
202
  else:
203
  return "Please provide either audio, text, or image input.", "No input provided.", None