Upload InternVL2 implementation
Browse files- Dockerfile +16 -7
- app_internvl2.py +38 -11
- requirements.txt +11 -7
Dockerfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
FROM nvidia/cuda:
|
2 |
|
3 |
# Set environment variables
|
4 |
ENV DEBIAN_FRONTEND=noninteractive
|
@@ -27,16 +27,25 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
27 |
# Create a working directory
|
28 |
WORKDIR /app
|
29 |
|
30 |
-
# Copy requirements file
|
31 |
COPY requirements.txt .
|
32 |
|
|
|
33 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
pip3 install --no-cache-dir -r requirements.txt && \
|
35 |
-
# Install
|
36 |
-
pip3 install --no-cache-dir
|
37 |
-
pip3 install --no-cache-dir ninja && \
|
38 |
-
# Install flash-attention for performance
|
39 |
-
pip3 install --no-cache-dir flash-attn
|
40 |
|
41 |
# Copy the application files
|
42 |
COPY . .
|
|
|
1 |
+
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
|
2 |
|
3 |
# Set environment variables
|
4 |
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
27 |
# Create a working directory
|
28 |
WORKDIR /app
|
29 |
|
30 |
+
# Copy requirements file
|
31 |
COPY requirements.txt .
|
32 |
|
33 |
+
# Upgrade pip and install dependencies in careful order
|
34 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
35 |
+
# Install torch and torchvision first
|
36 |
+
pip3 install --no-cache-dir torch==2.0.1 torchvision==0.15.2 && \
|
37 |
+
# Install core dependencies
|
38 |
+
pip3 install --no-cache-dir numpy==1.24.3 scipy==1.11.3 requests==2.31.0 && \
|
39 |
+
# Install huggingface dependencies
|
40 |
+
pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
|
41 |
+
# Install acceleration libraries
|
42 |
+
pip3 install --no-cache-dir accelerate==0.27.2 bitsandbytes==0.41.3 && \
|
43 |
+
# Install gradio
|
44 |
+
pip3 install --no-cache-dir gradio==3.38.0 && \
|
45 |
+
# Install remaining requirements
|
46 |
pip3 install --no-cache-dir -r requirements.txt && \
|
47 |
+
# Install specific version of lmdeploy that's known to work
|
48 |
+
pip3 install --no-cache-dir "lmdeploy>=0.5.0,<0.6.0"
|
|
|
|
|
|
|
49 |
|
50 |
# Copy the application files
|
51 |
COPY . .
|
app_internvl2.py
CHANGED
@@ -4,29 +4,46 @@ import os
|
|
4 |
import time
|
5 |
import numpy as np
|
6 |
import torch
|
7 |
-
import
|
8 |
-
|
9 |
-
# Import lmdeploy for InternVL2 model
|
10 |
-
from lmdeploy import pipeline, TurbomindEngineConfig
|
11 |
-
from lmdeploy.vl import load_image
|
12 |
|
13 |
# Set environment variables
|
14 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
|
|
19 |
|
20 |
-
# Global variables
|
21 |
internvl2_pipeline = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def load_internvl2_model():
|
24 |
"""Load the InternVL2 model using lmdeploy"""
|
25 |
-
global internvl2_pipeline
|
26 |
|
27 |
# If already loaded, return
|
28 |
if internvl2_pipeline is not None:
|
29 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
print("Loading InternVL2 model...")
|
32 |
try:
|
@@ -41,11 +58,13 @@ def load_internvl2_model():
|
|
41 |
)
|
42 |
|
43 |
print("InternVL2 model loaded successfully!")
|
|
|
44 |
return True
|
45 |
except Exception as e:
|
46 |
print(f"Error loading InternVL2 model: {str(e)}")
|
47 |
if "CUDA out of memory" in str(e):
|
48 |
print("Not enough GPU memory for the model")
|
|
|
49 |
return False
|
50 |
|
51 |
def analyze_image(image, prompt):
|
@@ -53,9 +72,14 @@ def analyze_image(image, prompt):
|
|
53 |
try:
|
54 |
start_time = time.time()
|
55 |
|
|
|
|
|
|
|
|
|
|
|
56 |
# Make sure the model is loaded
|
57 |
if not load_internvl2_model():
|
58 |
-
return "Couldn't load InternVL2 model."
|
59 |
|
60 |
# Convert numpy array to PIL Image
|
61 |
if isinstance(image, np.ndarray):
|
@@ -113,6 +137,9 @@ def create_interface():
|
|
113 |
gr.Markdown("# Image Analysis with InternVL2-40B")
|
114 |
gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
|
115 |
|
|
|
|
|
|
|
116 |
with gr.Row():
|
117 |
with gr.Column(scale=1):
|
118 |
input_image = gr.Image(type="pil", label="Upload Image")
|
|
|
4 |
import time
|
5 |
import numpy as np
|
6 |
import torch
|
7 |
+
import warnings
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Set environment variables
|
10 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
11 |
|
12 |
+
# Suppress specific warnings that might be caused by package version mismatches
|
13 |
+
warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
|
14 |
+
warnings.filterwarnings("ignore", message=".*Torch is not compiled with CUDA enabled.*")
|
15 |
+
warnings.filterwarnings("ignore", category=UserWarning)
|
16 |
|
17 |
+
# Global variables
|
18 |
internvl2_pipeline = None
|
19 |
+
MODEL_LOADED = False
|
20 |
+
USE_GPU = torch.cuda.is_available()
|
21 |
+
|
22 |
+
# Check if lmdeploy is available and try to import
|
23 |
+
try:
|
24 |
+
from lmdeploy import pipeline, TurbomindEngineConfig
|
25 |
+
LMDEPLOY_AVAILABLE = True
|
26 |
+
print("Successfully imported lmdeploy")
|
27 |
+
except ImportError:
|
28 |
+
LMDEPLOY_AVAILABLE = False
|
29 |
+
print("lmdeploy import failed. Will use a placeholder for demos.")
|
30 |
+
|
31 |
+
# Model configuration
|
32 |
+
MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ" # 4-bit quantized model
|
33 |
|
34 |
def load_internvl2_model():
|
35 |
"""Load the InternVL2 model using lmdeploy"""
|
36 |
+
global internvl2_pipeline, MODEL_LOADED
|
37 |
|
38 |
# If already loaded, return
|
39 |
if internvl2_pipeline is not None:
|
40 |
return True
|
41 |
+
|
42 |
+
# If lmdeploy is not available, we'll use a demo placeholder
|
43 |
+
if not LMDEPLOY_AVAILABLE:
|
44 |
+
print("lmdeploy not available. Using demo placeholder.")
|
45 |
+
MODEL_LOADED = False
|
46 |
+
return False
|
47 |
|
48 |
print("Loading InternVL2 model...")
|
49 |
try:
|
|
|
58 |
)
|
59 |
|
60 |
print("InternVL2 model loaded successfully!")
|
61 |
+
MODEL_LOADED = True
|
62 |
return True
|
63 |
except Exception as e:
|
64 |
print(f"Error loading InternVL2 model: {str(e)}")
|
65 |
if "CUDA out of memory" in str(e):
|
66 |
print("Not enough GPU memory for the model")
|
67 |
+
MODEL_LOADED = False
|
68 |
return False
|
69 |
|
70 |
def analyze_image(image, prompt):
|
|
|
72 |
try:
|
73 |
start_time = time.time()
|
74 |
|
75 |
+
# Skip model loading if lmdeploy is not available
|
76 |
+
if not LMDEPLOY_AVAILABLE:
|
77 |
+
return ("This is a demo placeholder. The actual model couldn't be loaded because lmdeploy "
|
78 |
+
"is not properly installed. Check your installation and dependencies.")
|
79 |
+
|
80 |
# Make sure the model is loaded
|
81 |
if not load_internvl2_model():
|
82 |
+
return "Couldn't load InternVL2 model. See logs for details."
|
83 |
|
84 |
# Convert numpy array to PIL Image
|
85 |
if isinstance(image, np.ndarray):
|
|
|
137 |
gr.Markdown("# Image Analysis with InternVL2-40B")
|
138 |
gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
|
139 |
|
140 |
+
if not LMDEPLOY_AVAILABLE:
|
141 |
+
gr.Markdown("⚠️ **WARNING**: lmdeploy is not properly installed. This demo will not function correctly.", elem_classes=["warning-message"])
|
142 |
+
|
143 |
with gr.Row():
|
144 |
with gr.Column(scale=1):
|
145 |
input_image = gr.Image(type="pil", label="Upload Image")
|
requirements.txt
CHANGED
@@ -1,14 +1,18 @@
|
|
1 |
-
torch==2.1
|
2 |
-
torchvision==0.
|
3 |
-
transformers
|
4 |
-
lmdeploy
|
5 |
pillow==10.0.1
|
6 |
-
numpy
|
7 |
scipy==1.11.3
|
8 |
gradio==3.38.0
|
9 |
requests==2.31.0
|
10 |
accelerate==0.27.2
|
11 |
bitsandbytes==0.41.3
|
12 |
safetensors==0.4.1
|
13 |
-
openai
|
14 |
-
huggingface_hub
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.0.1
|
2 |
+
torchvision==0.15.2
|
3 |
+
transformers==4.37.2
|
4 |
+
lmdeploy==0.5.3
|
5 |
pillow==10.0.1
|
6 |
+
numpy==1.24.3
|
7 |
scipy==1.11.3
|
8 |
gradio==3.38.0
|
9 |
requests==2.31.0
|
10 |
accelerate==0.27.2
|
11 |
bitsandbytes==0.41.3
|
12 |
safetensors==0.4.1
|
13 |
+
openai==1.6.1
|
14 |
+
huggingface_hub==0.19.4
|
15 |
+
packaging==23.2
|
16 |
+
pyyaml==6.0.1
|
17 |
+
tqdm==4.66.1
|
18 |
+
typing-extensions==4.8.0
|