mknolan commited on
Commit
3f1523d
·
verified ·
1 Parent(s): ce32a95

Upload InternVL2 implementation

Browse files
Files changed (1) hide show
  1. app_internvl2.py +59 -2
app_internvl2.py CHANGED
@@ -197,9 +197,61 @@ def analyze_image(image, prompt):
197
  if internvl2_model is not None:
198
  try:
199
  print("Running inference with InternVL2...")
 
 
 
200
  response = internvl2_model((prompt, pil_image))
201
- result = response.text if hasattr(response, "text") else str(response)
202
- return f"[InternVL2] {result}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  except Exception as e:
204
  print(f"Error with InternVL2: {str(e)}")
205
  # If InternVL2 fails, fall back to BLIP if available
@@ -212,6 +264,11 @@ def analyze_image(image, prompt):
212
  inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
213
  out = blip_model.generate(**inputs, max_new_tokens=100)
214
  result = blip_processor.decode(out[0], skip_special_tokens=True)
 
 
 
 
 
215
  return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
216
  except Exception as e:
217
  print(f"Error with BLIP: {str(e)}")
 
197
  if internvl2_model is not None:
198
  try:
199
  print("Running inference with InternVL2...")
200
+ print(f"Using prompt: '{prompt}'")
201
+
202
+ # Run the model and capture the raw response
203
  response = internvl2_model((prompt, pil_image))
204
+
205
+ # Print debug info about the response
206
+ print(f"Response type: {type(response)}")
207
+ print(f"Response attributes: {dir(response) if hasattr(response, '__dir__') else 'No dir available'}")
208
+
209
+ # Try different ways to extract the text
210
+ if hasattr(response, "text"):
211
+ result = response.text
212
+ print(f"Found 'text' attribute: '{result}'")
213
+ elif hasattr(response, "response"):
214
+ result = response.response
215
+ print(f"Found 'response' attribute: '{result}'")
216
+ elif hasattr(response, "generated_text"):
217
+ result = response.generated_text
218
+ print(f"Found 'generated_text' attribute: '{result}'")
219
+ else:
220
+ # If no attribute worked, convert the whole response to string
221
+ result = str(response)
222
+ print(f"Using string conversion: '{result}'")
223
+
224
+ # Check if we got an empty result
225
+ if not result or result.strip() == "":
226
+ print("WARNING: Received empty response from InternVL2")
227
+ # Try an alternative prompt to see if that works
228
+ print("Trying alternative prompt...")
229
+ alt_prompt = "This is an image. Describe what you see in detail."
230
+ response2 = internvl2_model((alt_prompt, pil_image))
231
+
232
+ if hasattr(response2, "text"):
233
+ result = response2.text
234
+ elif hasattr(response2, "response"):
235
+ result = response2.response
236
+ elif hasattr(response2, "generated_text"):
237
+ result = response2.generated_text
238
+ else:
239
+ result = str(response2)
240
+
241
+ if not result or result.strip() == "":
242
+ print("Alternative prompt also gave empty result")
243
+ # Fall through to BLIP fallback
244
+ raise ValueError("Empty response from InternVL2")
245
+ else:
246
+ print(f"Alternative prompt worked: '{result}'")
247
+
248
+ # If we got a valid result, return it
249
+ if result and result.strip() != "":
250
+ return f"[InternVL2] {result}"
251
+ else:
252
+ # Try BLIP instead
253
+ raise ValueError("Empty response from InternVL2")
254
+
255
  except Exception as e:
256
  print(f"Error with InternVL2: {str(e)}")
257
  # If InternVL2 fails, fall back to BLIP if available
 
264
  inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
265
  out = blip_model.generate(**inputs, max_new_tokens=100)
266
  result = blip_processor.decode(out[0], skip_special_tokens=True)
267
+
268
+ # Check if BLIP result is empty
269
+ if not result or result.strip() == "":
270
+ return "BLIP model returned an empty response. The model may be having issues processing this image."
271
+
272
  return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
273
  except Exception as e:
274
  print(f"Error with BLIP: {str(e)}")