mknolan commited on
Commit
fb7360f
·
verified ·
1 Parent(s): 3f1523d

Upload InternVL2 implementation

Browse files
Files changed (1) hide show
  1. app_internvl2.py +58 -68
app_internvl2.py CHANGED
@@ -133,46 +133,48 @@ def load_model():
133
  print("Cannot load models without GPU acceleration.")
134
  return False
135
 
136
- # First try to load InternVL2 if lmdeploy is available
 
 
 
 
 
 
 
 
 
 
 
 
137
  if HAS_LMDEPLOY:
138
  try:
139
  print("Attempting to load InternVL2 model...")
140
- # Configure for AWQ quantized model
141
  backend_config = TurbomindEngineConfig(
142
  model_format='awq',
143
- session_len=2048 # Explicitly set session length
 
 
 
144
  )
145
 
146
- # Set to non-streaming mode
147
  internvl2_model = pipeline(
148
  "OpenGVLab/InternVL2-40B-AWQ",
149
  backend_config=backend_config,
150
  model_name_or_path=None,
151
  backend_name="turbomind",
152
  stream=False, # Disable streaming
 
153
  )
154
 
155
  print("InternVL2 model loaded successfully!")
156
- return True
157
  except Exception as e:
158
  print(f"Failed to load InternVL2: {str(e)}")
159
  internvl2_model = None
160
 
161
- # If InternVL2 failed or lmdeploy not available, try BLIP
162
- if HAS_BLIP:
163
- try:
164
- print("Falling back to BLIP model...")
165
- blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
166
- blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")
167
- print("BLIP model loaded successfully!")
168
- return True
169
- except Exception as e:
170
- print(f"Failed to load BLIP: {str(e)}")
171
- blip_processor = None
172
- blip_model = None
173
-
174
- print("Could not load any model")
175
- return False
176
 
177
  # Try to load a model at startup
178
  MODEL_LOADED = load_model()
@@ -192,15 +194,44 @@ def analyze_image(image, prompt):
192
  pil_image = Image.fromarray(image).convert('RGB')
193
  else:
194
  pil_image = image.convert('RGB')
195
-
196
- # If we have InternVL2 loaded, use it
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  if internvl2_model is not None:
198
  try:
199
  print("Running inference with InternVL2...")
200
  print(f"Using prompt: '{prompt}'")
201
 
202
- # Run the model and capture the raw response
203
- response = internvl2_model((prompt, pil_image))
 
 
 
 
 
 
 
 
 
204
 
205
  # Print debug info about the response
206
  print(f"Response type: {type(response)}")
@@ -224,54 +255,13 @@ def analyze_image(image, prompt):
224
  # Check if we got an empty result
225
  if not result or result.strip() == "":
226
  print("WARNING: Received empty response from InternVL2")
227
- # Try an alternative prompt to see if that works
228
- print("Trying alternative prompt...")
229
- alt_prompt = "This is an image. Describe what you see in detail."
230
- response2 = internvl2_model((alt_prompt, pil_image))
231
-
232
- if hasattr(response2, "text"):
233
- result = response2.text
234
- elif hasattr(response2, "response"):
235
- result = response2.response
236
- elif hasattr(response2, "generated_text"):
237
- result = response2.generated_text
238
- else:
239
- result = str(response2)
240
-
241
- if not result or result.strip() == "":
242
- print("Alternative prompt also gave empty result")
243
- # Fall through to BLIP fallback
244
- raise ValueError("Empty response from InternVL2")
245
- else:
246
- print(f"Alternative prompt worked: '{result}'")
247
 
248
- # If we got a valid result, return it
249
- if result and result.strip() != "":
250
- return f"[InternVL2] {result}"
251
- else:
252
- # Try BLIP instead
253
- raise ValueError("Empty response from InternVL2")
254
 
255
  except Exception as e:
256
  print(f"Error with InternVL2: {str(e)}")
257
- # If InternVL2 fails, fall back to BLIP if available
258
-
259
- # If we have BLIP loaded, use it
260
- if blip_model is not None and blip_processor is not None:
261
- try:
262
- print("Running inference with BLIP...")
263
- # BLIP doesn't use prompts the same way, simplify
264
- inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
265
- out = blip_model.generate(**inputs, max_new_tokens=100)
266
- result = blip_processor.decode(out[0], skip_special_tokens=True)
267
-
268
- # Check if BLIP result is empty
269
- if not result or result.strip() == "":
270
- return "BLIP model returned an empty response. The model may be having issues processing this image."
271
-
272
- return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
273
- except Exception as e:
274
- print(f"Error with BLIP: {str(e)}")
275
 
276
  return "No model was able to analyze the image. See logs for details."
277
 
 
133
  print("Cannot load models without GPU acceleration.")
134
  return False
135
 
136
+ # Try to load BLIP first since it's more reliable
137
+ if HAS_BLIP:
138
+ try:
139
+ print("Loading BLIP model...")
140
+ blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
141
+ blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")
142
+ print("BLIP model loaded successfully!")
143
+ except Exception as e:
144
+ print(f"Failed to load BLIP: {str(e)}")
145
+ blip_processor = None
146
+ blip_model = None
147
+
148
+ # Then try InternVL2 if lmdeploy is available
149
  if HAS_LMDEPLOY:
150
  try:
151
  print("Attempting to load InternVL2 model...")
152
+ # Configure for AWQ quantized model with larger context
153
  backend_config = TurbomindEngineConfig(
154
  model_format='awq',
155
+ session_len=4096, # Increased session length
156
+ max_batch_size=1, # Limit batch size to reduce memory usage
157
+ cache_max_entry_count=0.3, # Adjust cache to optimize for single requests
158
+ tp=1 # Set tensor parallelism to 1 (use single GPU)
159
  )
160
 
161
+ # Set to non-streaming mode with explicit token limits
162
  internvl2_model = pipeline(
163
  "OpenGVLab/InternVL2-40B-AWQ",
164
  backend_config=backend_config,
165
  model_name_or_path=None,
166
  backend_name="turbomind",
167
  stream=False, # Disable streaming
168
+ max_new_tokens=512, # Explicitly set max new tokens
169
  )
170
 
171
  print("InternVL2 model loaded successfully!")
 
172
  except Exception as e:
173
  print(f"Failed to load InternVL2: {str(e)}")
174
  internvl2_model = None
175
 
176
+ # Return True if at least one model is loaded
177
+ return (blip_model is not None and blip_processor is not None) or (internvl2_model is not None)
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  # Try to load a model at startup
180
  MODEL_LOADED = load_model()
 
194
  pil_image = Image.fromarray(image).convert('RGB')
195
  else:
196
  pil_image = image.convert('RGB')
197
+
198
+ # Try BLIP first since it's more reliable
199
+ if blip_model is not None and blip_processor is not None:
200
+ try:
201
+ print("Running inference with BLIP...")
202
+ # BLIP doesn't use prompts the same way, simplify
203
+ inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
204
+ out = blip_model.generate(**inputs, max_length=80, min_length=10, num_beams=5)
205
+ result = blip_processor.decode(out[0], skip_special_tokens=True)
206
+
207
+ # Check if BLIP result is empty
208
+ if not result or result.strip() == "":
209
+ print("BLIP model returned an empty response")
210
+ # Only fall through to InternVL2 if BLIP fails
211
+ raise ValueError("Empty response from BLIP")
212
+
213
+ return f"[BLIP] {result}"
214
+ except Exception as e:
215
+ print(f"Error with BLIP: {str(e)}")
216
+ # If BLIP fails, try InternVL2 if available
217
+
218
+ # Try InternVL2 if available
219
  if internvl2_model is not None:
220
  try:
221
  print("Running inference with InternVL2...")
222
  print(f"Using prompt: '{prompt}'")
223
 
224
+ # Create a specifically formatted prompt for InternVL2
225
+ formatted_prompt = f"<image>\n{prompt}"
226
+ print(f"Formatted prompt: '{formatted_prompt}'")
227
+
228
+ # Run the model with more explicit parameters
229
+ response = internvl2_model(
230
+ (formatted_prompt, pil_image),
231
+ max_new_tokens=512, # Set higher token limit
232
+ temperature=0.7, # Add temperature for better generation
233
+ top_p=0.9 # Add top_p for better generation
234
+ )
235
 
236
  # Print debug info about the response
237
  print(f"Response type: {type(response)}")
 
255
  # Check if we got an empty result
256
  if not result or result.strip() == "":
257
  print("WARNING: Received empty response from InternVL2")
258
+ return "InternVL2 failed to analyze the image (empty response). This may be due to token limits."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
+ return f"[InternVL2] {result}"
 
 
 
 
 
261
 
262
  except Exception as e:
263
  print(f"Error with InternVL2: {str(e)}")
264
+ return f"Error with InternVL2: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
  return "No model was able to analyze the image. See logs for details."
267