mknolan commited on
Commit
45a88d3
·
verified ·
1 Parent(s): 02532a9

Upload InternVL2 implementation

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -0
  2. app_internvl2.py +31 -6
  3. requirements.txt +2 -1
Dockerfile CHANGED
@@ -60,6 +60,8 @@ RUN pip3 install --no-cache-dir --upgrade pip && \
60
  pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
61
  # Install timm for vision models
62
  pip3 install --no-cache-dir timm==0.9.11 && \
 
 
63
  # Install lmdeploy and its dependencies first
64
  pip3 install --no-cache-dir "accelerate==0.30.0" && \
65
  pip3 install --no-cache-dir "lmdeploy==0.5.3" && \
 
60
  pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
61
  # Install timm for vision models
62
  pip3 install --no-cache-dir timm==0.9.11 && \
63
+ # Install nest-asyncio for handling nested event loops
64
+ pip3 install --no-cache-dir nest-asyncio==1.5.8 && \
65
  # Install lmdeploy and its dependencies first
66
  pip3 install --no-cache-dir "accelerate==0.30.0" && \
67
  pip3 install --no-cache-dir "lmdeploy==0.5.3" && \
app_internvl2.py CHANGED
@@ -8,6 +8,11 @@ import warnings
8
  import stat
9
  import subprocess
10
  import sys
 
 
 
 
 
11
 
12
  # Set environment variables
13
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
@@ -143,11 +148,14 @@ def load_internvl2_model():
143
  # Configure for AWQ quantized model
144
  backend_config = TurbomindEngineConfig(model_format='awq')
145
 
146
- # Create pipeline
147
  internvl2_pipeline = pipeline(
148
  MODEL_ID,
149
- backend_config=backend_config,
150
- log_level='INFO'
 
 
 
151
  )
152
 
153
  print("InternVL2 model loaded successfully!")
@@ -189,11 +197,28 @@ def analyze_image(image, prompt):
189
  # If somehow it's already a PIL Image
190
  image_pil = image.convert('RGB')
191
 
192
- # Run inference with the model
193
- response = internvl2_pipeline((prompt, image_pil))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  # Get the response text
196
- result = response.text
197
 
198
  elapsed_time = time.time() - start_time
199
  return result
 
8
  import stat
9
  import subprocess
10
  import sys
11
+ import asyncio
12
+ import nest_asyncio
13
+
14
+ # Apply nest_asyncio to allow nested event loops
15
+ nest_asyncio.apply()
16
 
17
  # Set environment variables
18
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 
148
  # Configure for AWQ quantized model
149
  backend_config = TurbomindEngineConfig(model_format='awq')
150
 
151
+ # Create pipeline with non-streaming mode to avoid asyncio conflicts
152
  internvl2_pipeline = pipeline(
153
  MODEL_ID,
154
+ backend_config=backend_config,
155
+ log_level='INFO',
156
+ model_name_or_path=None,
157
+ backend_name="turbomind",
158
+ stream=False # Important: disable streaming to avoid asyncio issues
159
  )
160
 
161
  print("InternVL2 model loaded successfully!")
 
197
  # If somehow it's already a PIL Image
198
  image_pil = image.convert('RGB')
199
 
200
+ # Run inference with the model, handling event loop manually
201
+ loop = asyncio.get_event_loop()
202
+ if loop.is_running():
203
+ # If we're in a running event loop (like Gradio's),
204
+ # we need to use run_in_executor for blocking operations
205
+ print("Using threaded execution for model inference")
206
+ # Define a function that will run in a separate thread
207
+ def run_inference():
208
+ return internvl2_pipeline((prompt, image_pil))
209
+
210
+ # Run the inference in a thread pool executor
211
+ response = loop.run_in_executor(None, run_inference)
212
+ # Wait for the result
213
+ if hasattr(response, "result"):
214
+ response = response.result()
215
+ else:
216
+ # Standard synchronous execution
217
+ print("Using standard execution for model inference")
218
+ response = internvl2_pipeline((prompt, image_pil))
219
 
220
  # Get the response text
221
+ result = response.text if hasattr(response, "text") else str(response)
222
 
223
  elapsed_time = time.time() - start_time
224
  return result
requirements.txt CHANGED
@@ -16,4 +16,5 @@ packaging==23.2
16
  pyyaml==6.0.1
17
  tqdm==4.66.1
18
  typing-extensions==4.10.0
19
- timm==0.9.11
 
 
16
  pyyaml==6.0.1
17
  tqdm==4.66.1
18
  typing-extensions==4.10.0
19
+ timm==0.9.11
20
+ nest-asyncio==1.5.8