|
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime |
|
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive |
|
ENV PYTHONUNBUFFERED=1 |
|
ENV HF_HOME=/app/.cache/huggingface |
|
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers |
|
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 |
|
|
|
|
|
RUN mkdir -p /app/.cache/huggingface/transformers && \ |
|
chmod -R 777 /app |
|
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \ |
|
build-essential \ |
|
git \ |
|
curl \ |
|
ca-certificates \ |
|
cmake \ |
|
python3-pip \ |
|
python3-dev \ |
|
ninja-build \ |
|
&& rm -rf /var/lib/apt/lists/* |
|
|
|
|
|
WORKDIR /app |
|
|
|
|
|
COPY requirements.txt . |
|
RUN pip3 install --no-cache-dir --upgrade pip && \ |
|
pip3 install --no-cache-dir -r requirements.txt |
|
|
|
|
|
RUN pip3 install --no-cache-dir \ |
|
transformers==4.37.2 \ |
|
timm==0.9.11 \ |
|
accelerate==0.30.0 \ |
|
safetensors==0.4.1 \ |
|
einops |
|
|
|
|
|
|
|
RUN pip3 install --no-cache-dir \ |
|
ninja \ |
|
packaging \ |
|
"flash-attn<2.0.0" --no-build-isolation |
|
|
|
|
|
COPY simple_internvit_test.py . |
|
|
|
|
|
RUN echo '#!/bin/bash \n\ |
|
echo "Starting GPU diagnostics..." \n\ |
|
echo "===== System Information =====" \n\ |
|
python3 -c "import sys; print(f\"Python version: {sys.version}\")" \n\ |
|
python3 -c "import torch; print(f\"PyTorch version: {torch.__version__}\")" \n\ |
|
echo "\n===== CUDA Information =====" \n\ |
|
python3 -c "import torch; print(f\"CUDA available: {torch.cuda.is_available()}\")" \n\ |
|
if [ $(python3 -c "import torch; print(torch.cuda.is_available())") = "True" ]; then \n\ |
|
python3 -c "import torch; print(f\"CUDA version: {torch.version.cuda}\")" \n\ |
|
python3 -c "import torch; print(f\"GPU count: {torch.cuda.device_count()}\")" \n\ |
|
python3 -c "import torch; for i in range(torch.cuda.device_count()): print(f\"GPU {i}: {torch.cuda.get_device_name(i)}\")" \n\ |
|
python3 -c "import torch; print(f\"Allocated memory: {torch.cuda.memory_allocated() / 1024 / 1024:.2f} MB\")" \n\ |
|
python3 -c "import torch; print(f\"Reserved memory: {torch.cuda.memory_reserved() / 1024 / 1024:.2f} MB\")" \n\ |
|
fi \n\ |
|
echo "\n===== Package Information =====" \n\ |
|
pip3 list | grep -E "transformers|einops|torch|timm|flash|accelerate|safetensors" \n\ |
|
echo "\n===== Testing Simple CUDA Operation =====" \n\ |
|
python3 -c "import torch; a = torch.randn(1000, 1000).cuda(); b = torch.randn(1000, 1000).cuda(); t0 = torch.cuda.Event(enable_timing=True); t1 = torch.cuda.Event(enable_timing=True); t0.record(); c = torch.matmul(a, b); t1.record(); torch.cuda.synchronize(); print(f\"Matrix multiplication completed in {t0.elapsed_time(t1):.2f} ms\")" \n\ |
|
echo "\n===== NVIDIA System Information =====" \n\ |
|
if command -v nvidia-smi &> /dev/null; then \n\ |
|
nvidia-smi \n\ |
|
else \n\ |
|
echo "nvidia-smi not found" \n\ |
|
fi \n\ |
|
echo "\n===== Starting Application =====" \n\ |
|
exec "$@"' > /entrypoint.sh && \ |
|
chmod +x /entrypoint.sh |
|
|
|
|
|
EXPOSE 7860 |
|
|
|
|
|
ENTRYPOINT ["/entrypoint.sh"] |
|
|
|
|
|
CMD ["python3", "simple_internvit_test.py"] |