File size: 3,404 Bytes
02532a9
e59dc66
 
 
 
f666747
 
e59dc66
f8e5af1
 
e59dc66
 
f666747
e59dc66
f666747
 
e59dc66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8e5af1
 
 
 
 
 
 
 
 
02532a9
 
 
f8e5af1
 
 
57d5e90
e59dc66
 
8dc8d0b
e59dc66
02532a9
 
57d5e90
 
c93688f
 
57d5e90
 
02532a9
 
45a88d3
 
8dc8d0b
 
 
 
 
57d5e90
 
8dc8d0b
c93688f
e59dc66
 
 
 
f666747
 
 
 
 
5b040dc
e59dc66
 
 
f8e5af1
 
 
e59dc66
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV HF_HOME=/app/.cache/huggingface
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers
ENV MPLCONFIGDIR=/tmp/matplotlib
# Force PyTorch to use the NCCl backend
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128

# Create necessary directories with proper permissions
RUN mkdir -p /app/.cache/huggingface/transformers && \
    mkdir -p /tmp/matplotlib && \
    mkdir -p /app/gradio_cached_examples && \
    chmod -R 777 /app && \
    chmod -R 777 /tmp/matplotlib

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    git \
    curl \
    ca-certificates \
    python3-pip \
    python3-dev \
    python3-setuptools \
    && rm -rf /var/lib/apt/lists/*

# Create a working directory
WORKDIR /app

# Add a script to check GPU status at startup
RUN echo '#!/bin/bash \n\
echo "Checking NVIDIA GPU status..." \n\
if ! command -v nvidia-smi &> /dev/null; then \n\
    echo "WARNING: nvidia-smi command not found. NVIDIA driver might not be installed." \n\
else \n\
    echo "NVIDIA driver found. Running nvidia-smi:" \n\
    nvidia-smi \n\
fi \n\
echo "Environment variables for GPU:" \n\
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}" \n\
echo "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \n\
exec "$@"' > /entrypoint.sh && \
chmod +x /entrypoint.sh

# Copy requirements file
COPY requirements.txt .

# Upgrade pip and install dependencies in specific order to avoid conflicts
RUN pip3 install --no-cache-dir --upgrade pip && \
    # Install torch and torchvision first with CUDA support
    pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 && \
    # Install core dependencies
    pip3 install --no-cache-dir numpy==1.24.3 scipy==1.11.3 requests==2.31.0 && \
    # Install typing-extensions first to ensure proper version for other packages
    pip3 install --no-cache-dir typing-extensions==4.10.0 && \
    # Install huggingface dependencies
    pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
    # Install timm for vision models
    pip3 install --no-cache-dir timm==0.9.11 && \
    # Install nest-asyncio for handling nested event loops
    pip3 install --no-cache-dir nest-asyncio==1.5.8 && \
    # Install lmdeploy and its dependencies first
    pip3 install --no-cache-dir "accelerate==0.30.0" && \
    pip3 install --no-cache-dir "lmdeploy==0.5.3" && \
    # Install other acceleration libraries
    pip3 install --no-cache-dir bitsandbytes==0.41.3 && \
    # Install gradio
    pip3 install --no-cache-dir gradio==3.38.0 && \
    # Install any remaining requirements
    pip3 install --no-cache-dir packaging==23.2 pyyaml==6.0.1 tqdm==4.66.1 openai==1.6.1

# Copy the application files
COPY . .

# Make sure the runtime directories exist and have proper permissions
RUN mkdir -p gradio_cached_examples && \
    chmod -R 777 gradio_cached_examples && \
    mkdir -p .cache/huggingface/transformers && \
    chmod -R 777 .cache

# Make port 7860 available for the app
EXPOSE 7860

# Use our entrypoint script to check GPU status before starting the app
ENTRYPOINT ["/entrypoint.sh"]

# Start the application
CMD ["python3", "app_internvl2.py"]