Upload InternVL2 implementation
Browse files- app_internvl2.py +59 -2
app_internvl2.py
CHANGED
@@ -197,9 +197,61 @@ def analyze_image(image, prompt):
|
|
197 |
if internvl2_model is not None:
|
198 |
try:
|
199 |
print("Running inference with InternVL2...")
|
|
|
|
|
|
|
200 |
response = internvl2_model((prompt, pil_image))
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
except Exception as e:
|
204 |
print(f"Error with InternVL2: {str(e)}")
|
205 |
# If InternVL2 fails, fall back to BLIP if available
|
@@ -212,6 +264,11 @@ def analyze_image(image, prompt):
|
|
212 |
inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
|
213 |
out = blip_model.generate(**inputs, max_new_tokens=100)
|
214 |
result = blip_processor.decode(out[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
215 |
return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
|
216 |
except Exception as e:
|
217 |
print(f"Error with BLIP: {str(e)}")
|
|
|
197 |
if internvl2_model is not None:
|
198 |
try:
|
199 |
print("Running inference with InternVL2...")
|
200 |
+
print(f"Using prompt: '{prompt}'")
|
201 |
+
|
202 |
+
# Run the model and capture the raw response
|
203 |
response = internvl2_model((prompt, pil_image))
|
204 |
+
|
205 |
+
# Print debug info about the response
|
206 |
+
print(f"Response type: {type(response)}")
|
207 |
+
print(f"Response attributes: {dir(response) if hasattr(response, '__dir__') else 'No dir available'}")
|
208 |
+
|
209 |
+
# Try different ways to extract the text
|
210 |
+
if hasattr(response, "text"):
|
211 |
+
result = response.text
|
212 |
+
print(f"Found 'text' attribute: '{result}'")
|
213 |
+
elif hasattr(response, "response"):
|
214 |
+
result = response.response
|
215 |
+
print(f"Found 'response' attribute: '{result}'")
|
216 |
+
elif hasattr(response, "generated_text"):
|
217 |
+
result = response.generated_text
|
218 |
+
print(f"Found 'generated_text' attribute: '{result}'")
|
219 |
+
else:
|
220 |
+
# If no attribute worked, convert the whole response to string
|
221 |
+
result = str(response)
|
222 |
+
print(f"Using string conversion: '{result}'")
|
223 |
+
|
224 |
+
# Check if we got an empty result
|
225 |
+
if not result or result.strip() == "":
|
226 |
+
print("WARNING: Received empty response from InternVL2")
|
227 |
+
# Try an alternative prompt to see if that works
|
228 |
+
print("Trying alternative prompt...")
|
229 |
+
alt_prompt = "This is an image. Describe what you see in detail."
|
230 |
+
response2 = internvl2_model((alt_prompt, pil_image))
|
231 |
+
|
232 |
+
if hasattr(response2, "text"):
|
233 |
+
result = response2.text
|
234 |
+
elif hasattr(response2, "response"):
|
235 |
+
result = response2.response
|
236 |
+
elif hasattr(response2, "generated_text"):
|
237 |
+
result = response2.generated_text
|
238 |
+
else:
|
239 |
+
result = str(response2)
|
240 |
+
|
241 |
+
if not result or result.strip() == "":
|
242 |
+
print("Alternative prompt also gave empty result")
|
243 |
+
# Fall through to BLIP fallback
|
244 |
+
raise ValueError("Empty response from InternVL2")
|
245 |
+
else:
|
246 |
+
print(f"Alternative prompt worked: '{result}'")
|
247 |
+
|
248 |
+
# If we got a valid result, return it
|
249 |
+
if result and result.strip() != "":
|
250 |
+
return f"[InternVL2] {result}"
|
251 |
+
else:
|
252 |
+
# Try BLIP instead
|
253 |
+
raise ValueError("Empty response from InternVL2")
|
254 |
+
|
255 |
except Exception as e:
|
256 |
print(f"Error with InternVL2: {str(e)}")
|
257 |
# If InternVL2 fails, fall back to BLIP if available
|
|
|
264 |
inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
|
265 |
out = blip_model.generate(**inputs, max_new_tokens=100)
|
266 |
result = blip_processor.decode(out[0], skip_special_tokens=True)
|
267 |
+
|
268 |
+
# Check if BLIP result is empty
|
269 |
+
if not result or result.strip() == "":
|
270 |
+
return "BLIP model returned an empty response. The model may be having issues processing this image."
|
271 |
+
|
272 |
return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
|
273 |
except Exception as e:
|
274 |
print(f"Error with BLIP: {str(e)}")
|