Spaces:

mknolan
/

cursor_slides_internvl2

Paused

App Files Files Community

mknolan commited on Mar 11

Commit

57d5e90

verified ·

1 Parent(s): e59dc66

Upload InternVL2 implementation

Browse files

Files changed (3) hide show

Dockerfile +16 -7
app_internvl2.py +38 -11
requirements.txt +11 -7

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
 # Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive
@@ -27,16 +27,25 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # Create a working directory
 WORKDIR /app
-# Copy requirements file and install Python dependencies
 COPY requirements.txt .
 RUN pip3 install --no-cache-dir --upgrade pip && \
     pip3 install --no-cache-dir -r requirements.txt && \
-    # Install additional dependencies for lmdeploy
-    pip3 install --no-cache-dir cmake && \
-    pip3 install --no-cache-dir ninja && \
-    # Install flash-attention for performance
-    pip3 install --no-cache-dir flash-attn
 # Copy the application files
 COPY . .

+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
 # Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive
 # Create a working directory
 WORKDIR /app
+# Copy requirements file
 COPY requirements.txt .
+# Upgrade pip and install dependencies in careful order
 RUN pip3 install --no-cache-dir --upgrade pip && \
+    # Install torch and torchvision first
+    pip3 install --no-cache-dir torch==2.0.1 torchvision==0.15.2 && \
+    # Install core dependencies
+    pip3 install --no-cache-dir numpy==1.24.3 scipy==1.11.3 requests==2.31.0 && \
+    # Install huggingface dependencies
+    pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
+    # Install acceleration libraries
+    pip3 install --no-cache-dir accelerate==0.27.2 bitsandbytes==0.41.3 && \
+    # Install gradio
+    pip3 install --no-cache-dir gradio==3.38.0 && \
+    # Install remaining requirements
     pip3 install --no-cache-dir -r requirements.txt && \
+    # Install specific version of lmdeploy that's known to work
+    pip3 install --no-cache-dir "lmdeploy>=0.5.0,<0.6.0"
 # Copy the application files
 COPY . .

app_internvl2.py CHANGED Viewed

@@ -4,29 +4,46 @@ import os
 import time
 import numpy as np
 import torch
-import math
-# Import lmdeploy for InternVL2 model
-from lmdeploy import pipeline, TurbomindEngineConfig
-from lmdeploy.vl import load_image
 # Set environment variables
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
-# Model configuration
-MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ"  # 4-bit quantized model
-USE_GPU = torch.cuda.is_available()
-# Global variables for model
 internvl2_pipeline = None
 def load_internvl2_model():
     """Load the InternVL2 model using lmdeploy"""
-    global internvl2_pipeline
     # If already loaded, return
     if internvl2_pipeline is not None:
         return True
     print("Loading InternVL2 model...")
     try:
@@ -41,11 +58,13 @@ def load_internvl2_model():
         )
         print("InternVL2 model loaded successfully!")
         return True
     except Exception as e:
         print(f"Error loading InternVL2 model: {str(e)}")
         if "CUDA out of memory" in str(e):
             print("Not enough GPU memory for the model")
         return False
 def analyze_image(image, prompt):
@@ -53,9 +72,14 @@ def analyze_image(image, prompt):
     try:
         start_time = time.time()
         # Make sure the model is loaded
         if not load_internvl2_model():
-            return "Couldn't load InternVL2 model."
         # Convert numpy array to PIL Image
         if isinstance(image, np.ndarray):
@@ -113,6 +137,9 @@ def create_interface():
         gr.Markdown("# Image Analysis with InternVL2-40B")
         gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(type="pil", label="Upload Image")

 import time
 import numpy as np
 import torch
+import warnings
 # Set environment variables
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
+# Suppress specific warnings that might be caused by package version mismatches
+warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
+warnings.filterwarnings("ignore", message=".*Torch is not compiled with CUDA enabled.*")
+warnings.filterwarnings("ignore", category=UserWarning)
+# Global variables
 internvl2_pipeline = None
+MODEL_LOADED = False
+USE_GPU = torch.cuda.is_available()
+# Check if lmdeploy is available and try to import
+try:
+    from lmdeploy import pipeline, TurbomindEngineConfig
+    LMDEPLOY_AVAILABLE = True
+    print("Successfully imported lmdeploy")
+except ImportError:
+    LMDEPLOY_AVAILABLE = False
+    print("lmdeploy import failed. Will use a placeholder for demos.")
+# Model configuration
+MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ"  # 4-bit quantized model
 def load_internvl2_model():
     """Load the InternVL2 model using lmdeploy"""
+    global internvl2_pipeline, MODEL_LOADED
     # If already loaded, return
     if internvl2_pipeline is not None:
         return True
+    # If lmdeploy is not available, we'll use a demo placeholder
+    if not LMDEPLOY_AVAILABLE:
+        print("lmdeploy not available. Using demo placeholder.")
+        MODEL_LOADED = False
+        return False
     print("Loading InternVL2 model...")
     try:
         )
         print("InternVL2 model loaded successfully!")
+        MODEL_LOADED = True
         return True
     except Exception as e:
         print(f"Error loading InternVL2 model: {str(e)}")
         if "CUDA out of memory" in str(e):
             print("Not enough GPU memory for the model")
+        MODEL_LOADED = False
         return False
 def analyze_image(image, prompt):
     try:
         start_time = time.time()
+        # Skip model loading if lmdeploy is not available
+        if not LMDEPLOY_AVAILABLE:
+            return ("This is a demo placeholder. The actual model couldn't be loaded because lmdeploy "
+                   "is not properly installed. Check your installation and dependencies.")
         # Make sure the model is loaded
         if not load_internvl2_model():
+            return "Couldn't load InternVL2 model. See logs for details."
         # Convert numpy array to PIL Image
         if isinstance(image, np.ndarray):
         gr.Markdown("# Image Analysis with InternVL2-40B")
         gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.")
+        if not LMDEPLOY_AVAILABLE:
+            gr.Markdown("⚠️ **WARNING**: lmdeploy is not properly installed. This demo will not function correctly.", elem_classes=["warning-message"])
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(type="pil", label="Upload Image")

requirements.txt CHANGED Viewed

@@ -1,14 +1,18 @@
-torch==2.1.0
-torchvision==0.16.0
-transformers>=4.37.2
-lmdeploy>=0.5.3
 pillow==10.0.1
-numpy>=1.24.0
 scipy==1.11.3
 gradio==3.38.0
 requests==2.31.0
 accelerate==0.27.2
 bitsandbytes==0.41.3
 safetensors==0.4.1
-openai
-huggingface_hub

+torch==2.0.1
+torchvision==0.15.2
+transformers==4.37.2
+lmdeploy==0.5.3
 pillow==10.0.1
+numpy==1.24.3
 scipy==1.11.3
 gradio==3.38.0
 requests==2.31.0
 accelerate==0.27.2
 bitsandbytes==0.41.3
 safetensors==0.4.1
+openai==1.6.1
+huggingface_hub==0.19.4
+packaging==23.2
+pyyaml==6.0.1
+tqdm==4.66.1
+typing-extensions==4.8.0