# Use base image with CUDA and Ubuntu 20.04 FROM nvidia/cuda:12.5.1-cudnn-devel-ubuntu20.04 # Set environment variables for non-interactive installation and timezone ENV DEBIAN_FRONTEND=noninteractive \ TZ=Europe/Paris # Remove third-party apt sources to avoid issues with expiring keys # Install basic utilities RUN rm -f /etc/apt/sources.list.d/*.list && \ apt-get update && apt-get install -y --no-install-recommends \ curl \ ca-certificates \ sudo \ git \ wget \ procps \ git-lfs \ zip \ unzip \ htop \ vim \ nano \ bzip2 \ libx11-6 \ build-essential \ libsndfile-dev \ software-properties-common \ && rm -rf /var/lib/apt/lists/* # Install NVTOP for GPU monitoring RUN add-apt-repository ppa:flexiondotorg/nvtop && \ apt-get upgrade -y && \ apt-get install -y --no-install-recommends nvtop # Install Node.js and HTTP proxy RUN curl -sL https://deb.nodesource.com/setup_21.x | bash - && \ apt-get install -y nodejs && \ npm install -g configurable-http-proxy # Create a working directory WORKDIR /app # Create a non-root user, set up permissions, and switch to it RUN adduser --disabled-password --gecos '' --shell /bin/bash user && \ chown -R user:user /app && \ echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user USER user # Set home directory and create cache/config directories ENV HOME=/home/user RUN mkdir -p $HOME/.cache $HOME/.config && \ chmod -R 777 $HOME # Set up Conda environment ENV CONDA_AUTO_UPDATE_CONDA=false \ PATH=$HOME/miniconda/bin:$PATH RUN curl -sLo ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh && \ chmod +x ~/miniconda.sh && \ ~/miniconda.sh -b -p ~/miniconda && \ rm ~/miniconda.sh && \ conda clean -ya # Install Python dependencies RUN conda install -y \ python=3.9 \ pip \ cmake \ wheel \ packaging \ ninja \ setuptools-scm \ numpy \ scipy \ numba \ git-lfs \ torchvision && \ conda clean -ya # Install PyTorch nightly version RUN pip install --upgrade pip && \ pip install --no-cache-dir --pre torch==2.6.0.dev20241122 --index-url https://download.pytorch.org/whl/nightly/rocm6.2 # Install vllm and huggingface-hub RUN pip install vllm==0.6.2 && \ pip install huggingface-hub[cli] # Expose port for the model server EXPOSE 7860 # Set the working directory to /app and set the model directory WORKDIR $HOME/app # Command to run the model server (replace with your specific model path) CMD ["vllm", "serve", "--device", "cpu", "--port", "7860", "Hjgugugjhuhjggg/mergekit-ties-tzamfyy", "--max-model-len", "10000"]