FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 # Set environment variables ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 ENV HF_HOME=/app/.cache/huggingface ENV TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers ENV MPLCONFIGDIR=/tmp/matplotlib # Force PyTorch to use the NCCl backend ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 # Create necessary directories with proper permissions RUN mkdir -p /app/.cache/huggingface/transformers && \ mkdir -p /tmp/matplotlib && \ mkdir -p /app/gradio_cached_examples && \ chmod -R 777 /app && \ chmod -R 777 /tmp/matplotlib # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ git \ curl \ ca-certificates \ python3-pip \ python3-dev \ python3-setuptools \ && rm -rf /var/lib/apt/lists/* # Create a working directory WORKDIR /app # Add a script to check GPU status at startup RUN echo '#!/bin/bash \n\ echo "Checking NVIDIA GPU status..." \n\ if ! command -v nvidia-smi &> /dev/null; then \n\ echo "WARNING: nvidia-smi command not found. NVIDIA driver might not be installed." \n\ else \n\ echo "NVIDIA driver found. Running nvidia-smi:" \n\ nvidia-smi \n\ fi \n\ echo "Environment variables for GPU:" \n\ echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}" \n\ echo "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \n\ exec "$@"' > /entrypoint.sh && \ chmod +x /entrypoint.sh # Copy requirements file COPY requirements.txt . # Upgrade pip and install dependencies in specific order to avoid conflicts RUN pip3 install --no-cache-dir --upgrade pip && \ # Install torch and torchvision first with CUDA support pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 && \ # Install core dependencies pip3 install --no-cache-dir numpy==1.24.3 scipy==1.11.3 requests==2.31.0 && \ # Install typing-extensions first to ensure proper version for other packages pip3 install --no-cache-dir typing-extensions==4.10.0 && \ # Install huggingface dependencies pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \ # Install timm for vision models pip3 install --no-cache-dir timm==0.9.11 && \ # Install nest-asyncio for handling nested event loops pip3 install --no-cache-dir nest-asyncio==1.5.8 && \ # Install lmdeploy and its dependencies first pip3 install --no-cache-dir "accelerate==0.30.0" && \ pip3 install --no-cache-dir "lmdeploy==0.5.3" && \ # Install other acceleration libraries pip3 install --no-cache-dir bitsandbytes==0.41.3 && \ # Install gradio pip3 install --no-cache-dir gradio==3.38.0 && \ # Install any remaining requirements pip3 install --no-cache-dir packaging==23.2 pyyaml==6.0.1 tqdm==4.66.1 openai==1.6.1 # Copy the application files COPY . . # Make sure the runtime directories exist and have proper permissions RUN mkdir -p gradio_cached_examples && \ chmod -R 777 gradio_cached_examples && \ mkdir -p .cache/huggingface/transformers && \ chmod -R 777 .cache # Make port 7860 available for the app EXPOSE 7860 # Use our entrypoint script to check GPU status before starting the app ENTRYPOINT ["/entrypoint.sh"] # Start the application CMD ["python3", "app_internvl2.py"]