mknolan commited on
Commit
57d5e90
·
verified ·
1 Parent(s): e59dc66

Upload InternVL2 implementation

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -7
  2. app_internvl2.py +38 -11
  3. requirements.txt +11 -7
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
2
 
3
  # Set environment variables
4
  ENV DEBIAN_FRONTEND=noninteractive
@@ -27,16 +27,25 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
27
  # Create a working directory
28
  WORKDIR /app
29
 
30
- # Copy requirements file and install Python dependencies
31
  COPY requirements.txt .
32
 
 
33
  RUN pip3 install --no-cache-dir --upgrade pip && \
 
 
 
 
 
 
 
 
 
 
 
34
  pip3 install --no-cache-dir -r requirements.txt && \
35
- # Install additional dependencies for lmdeploy
36
- pip3 install --no-cache-dir cmake && \
37
- pip3 install --no-cache-dir ninja && \
38
- # Install flash-attention for performance
39
- pip3 install --no-cache-dir flash-attn
40
 
41
  # Copy the application files
42
  COPY . .
 
1
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
2
 
3
  # Set environment variables
4
  ENV DEBIAN_FRONTEND=noninteractive
 
27
  # Create a working directory
28
  WORKDIR /app
29
 
30
+ # Copy requirements file
31
  COPY requirements.txt .
32
 
33
+ # Upgrade pip and install dependencies in careful order
34
  RUN pip3 install --no-cache-dir --upgrade pip && \
35
+ # Install torch and torchvision first
36
+ pip3 install --no-cache-dir torch==2.0.1 torchvision==0.15.2 && \
37
+ # Install core dependencies
38
+ pip3 install --no-cache-dir numpy==1.24.3 scipy==1.11.3 requests==2.31.0 && \
39
+ # Install huggingface dependencies
40
+ pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
41
+ # Install acceleration libraries
42
+ pip3 install --no-cache-dir accelerate==0.27.2 bitsandbytes==0.41.3 && \
43
+ # Install gradio
44
+ pip3 install --no-cache-dir gradio==3.38.0 && \
45
+ # Install remaining requirements
46
  pip3 install --no-cache-dir -r requirements.txt && \
47
+ # Install specific version of lmdeploy that's known to work
48
+ pip3 install --no-cache-dir "lmdeploy>=0.5.0,<0.6.0"
 
 
 
49
 
50
  # Copy the application files
51
  COPY . .
app_internvl2.py CHANGED
@@ -4,29 +4,46 @@ import os
4
  import time
5
  import numpy as np
6
  import torch
7
- import math
8
-
9
- # Import lmdeploy for InternVL2 model
10
- from lmdeploy import pipeline, TurbomindEngineConfig
11
- from lmdeploy.vl import load_image
12
 
13
  # Set environment variables
14
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
15
 
16
- # Model configuration
17
- MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ" # 4-bit quantized model
18
- USE_GPU = torch.cuda.is_available()
 
19
 
20
- # Global variables for model
21
  internvl2_pipeline = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def load_internvl2_model():
24
  """Load the InternVL2 model using lmdeploy"""
25
- global internvl2_pipeline
26
 
27
  # If already loaded, return
28
  if internvl2_pipeline is not None:
29
  return True
 
 
 
 
 
 
30
 
31
  print("Loading InternVL2 model...")
32
  try:
@@ -41,11 +58,13 @@ def load_internvl2_model():
41
  )
42
 
43
  print("InternVL2 model loaded successfully!")
 
44
  return True
45
  except Exception as e:
46
  print(f"Error loading InternVL2 model: {str(e)}")
47
  if "CUDA out of memory" in str(e):
48
  print("Not enough GPU memory for the model")
 
49
  return False
50
 
51
  def analyze_image(image, prompt):
@@ -53,9 +72,14 @@ def analyze_image(image, prompt):
53
  try:
54
  start_time = time.time()
55
 
 
 
 
 
 
56
  # Make sure the model is loaded
57
  if not load_internvl2_model():
58
- return "Couldn't load InternVL2 model."
59
 
60
  # Convert numpy array to PIL Image
61
  if isinstance(image, np.ndarray):
@@ -113,6 +137,9 @@ def create_interface():
113
  gr.Markdown("# Image Analysis with InternVL2-40B")
114
  gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
115
 
 
 
 
116
  with gr.Row():
117
  with gr.Column(scale=1):
118
  input_image = gr.Image(type="pil", label="Upload Image")
 
4
  import time
5
  import numpy as np
6
  import torch
7
+ import warnings
 
 
 
 
8
 
9
  # Set environment variables
10
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
11
 
12
+ # Suppress specific warnings that might be caused by package version mismatches
13
+ warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
14
+ warnings.filterwarnings("ignore", message=".*Torch is not compiled with CUDA enabled.*")
15
+ warnings.filterwarnings("ignore", category=UserWarning)
16
 
17
+ # Global variables
18
  internvl2_pipeline = None
19
+ MODEL_LOADED = False
20
+ USE_GPU = torch.cuda.is_available()
21
+
22
+ # Check if lmdeploy is available and try to import
23
+ try:
24
+ from lmdeploy import pipeline, TurbomindEngineConfig
25
+ LMDEPLOY_AVAILABLE = True
26
+ print("Successfully imported lmdeploy")
27
+ except ImportError:
28
+ LMDEPLOY_AVAILABLE = False
29
+ print("lmdeploy import failed. Will use a placeholder for demos.")
30
+
31
+ # Model configuration
32
+ MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ" # 4-bit quantized model
33
 
34
  def load_internvl2_model():
35
  """Load the InternVL2 model using lmdeploy"""
36
+ global internvl2_pipeline, MODEL_LOADED
37
 
38
  # If already loaded, return
39
  if internvl2_pipeline is not None:
40
  return True
41
+
42
+ # If lmdeploy is not available, we'll use a demo placeholder
43
+ if not LMDEPLOY_AVAILABLE:
44
+ print("lmdeploy not available. Using demo placeholder.")
45
+ MODEL_LOADED = False
46
+ return False
47
 
48
  print("Loading InternVL2 model...")
49
  try:
 
58
  )
59
 
60
  print("InternVL2 model loaded successfully!")
61
+ MODEL_LOADED = True
62
  return True
63
  except Exception as e:
64
  print(f"Error loading InternVL2 model: {str(e)}")
65
  if "CUDA out of memory" in str(e):
66
  print("Not enough GPU memory for the model")
67
+ MODEL_LOADED = False
68
  return False
69
 
70
  def analyze_image(image, prompt):
 
72
  try:
73
  start_time = time.time()
74
 
75
+ # Skip model loading if lmdeploy is not available
76
+ if not LMDEPLOY_AVAILABLE:
77
+ return ("This is a demo placeholder. The actual model couldn't be loaded because lmdeploy "
78
+ "is not properly installed. Check your installation and dependencies.")
79
+
80
  # Make sure the model is loaded
81
  if not load_internvl2_model():
82
+ return "Couldn't load InternVL2 model. See logs for details."
83
 
84
  # Convert numpy array to PIL Image
85
  if isinstance(image, np.ndarray):
 
137
  gr.Markdown("# Image Analysis with InternVL2-40B")
138
  gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
139
 
140
+ if not LMDEPLOY_AVAILABLE:
141
+ gr.Markdown("⚠️ **WARNING**: lmdeploy is not properly installed. This demo will not function correctly.", elem_classes=["warning-message"])
142
+
143
  with gr.Row():
144
  with gr.Column(scale=1):
145
  input_image = gr.Image(type="pil", label="Upload Image")
requirements.txt CHANGED
@@ -1,14 +1,18 @@
1
- torch==2.1.0
2
- torchvision==0.16.0
3
- transformers>=4.37.2
4
- lmdeploy>=0.5.3
5
  pillow==10.0.1
6
- numpy>=1.24.0
7
  scipy==1.11.3
8
  gradio==3.38.0
9
  requests==2.31.0
10
  accelerate==0.27.2
11
  bitsandbytes==0.41.3
12
  safetensors==0.4.1
13
- openai
14
- huggingface_hub
 
 
 
 
 
1
+ torch==2.0.1
2
+ torchvision==0.15.2
3
+ transformers==4.37.2
4
+ lmdeploy==0.5.3
5
  pillow==10.0.1
6
+ numpy==1.24.3
7
  scipy==1.11.3
8
  gradio==3.38.0
9
  requests==2.31.0
10
  accelerate==0.27.2
11
  bitsandbytes==0.41.3
12
  safetensors==0.4.1
13
+ openai==1.6.1
14
+ huggingface_hub==0.19.4
15
+ packaging==23.2
16
+ pyyaml==6.0.1
17
+ tqdm==4.66.1
18
+ typing-extensions==4.8.0