llama-cpp-server

Paused

App Files Files Community

matthoffner commited on Feb 12, 2024

Commit

e524e3e

verified ·

1 Parent(s): 4dc075b

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +44 -30

Dockerfile CHANGED Viewed

@@ -1,59 +1,73 @@
 ARG UBUNTU_VERSION=22.04
-# This needs to generally match the container host's environment.
 ARG CUDA_VERSION=11.7.1
-# Target the CUDA build image
 ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
-# Target the CUDA runtime image
 ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
 FROM ${BASE_CUDA_DEV_CONTAINER} as build
-# Unless otherwise specified, we make a fat build.
-ARG CUDA_DOCKER_ARCH=all
 RUN apt-get update && \
     apt-get install -y build-essential git
 # Install Python3 and pip
 RUN apt-get install -y python3 python3-pip
 WORKDIR /app
 COPY . .
-# Set nvcc architecture
-ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable cuBLAS
-ENV LLAMA_CUBLAS=1
 FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
-# Install build and runtime dependencies
-RUN apt-get update && \
-    apt-get install -y \
-    libopenblas-dev \
-    ninja-build \
-    build-essential \
-    pkg-config \
-    curl
-# Install Python3 and pip for the runtime container as well
 RUN apt-get install -y python3 python3-pip
-RUN pip3 install -U pip setuptools wheel && \
-  pip3 install --verbose llama-cpp-python[server]
-# Download model
-RUN mkdir model && \
-    curl -L https://huggingface.co/matthoffner/Magicoder-S-DS-6.7B-GGUF/resolve/main/Magicoder-S-DS-6.7B_Q4_K_M.gguf -o model/gguf-model.gguf
-COPY ./main.py ./
-# Set environment variable for the host
-ENV HOST=0.0.0.0
-ENV PORT=7860
-# Expose a port for the server
 EXPOSE ${PORT}
 # Run the server start script

+# Set arguments for versions
 ARG UBUNTU_VERSION=22.04
 ARG CUDA_VERSION=11.7.1
 ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
 ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+# Build stage with CUDA development container
 FROM ${BASE_CUDA_DEV_CONTAINER} as build
+# Install build essentials and git
 RUN apt-get update && \
     apt-get install -y build-essential git
 # Install Python3 and pip
 RUN apt-get install -y python3 python3-pip
+# Set work directory to /app
 WORKDIR /app
+# Copy your application code to the container
 COPY . .
+# Create a non-root user 'user' in the build stage as well
+RUN useradd -m -u 1000 user
+# Switch to the non-root user for any further commands
+USER user
+# Set nvcc architecture and enable cuBLAS
+ENV CUDA_DOCKER_ARCH=all \
+    LLAMA_CUBLAS=1
+# Runtime stage with CUDA runtime container
 FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
+# Re-create the non-root user 'user' in the runtime stage
+RUN useradd -m -u 1000 user && \
+    apt-get update && \
+    apt-get install -y libopenblas-dev ninja-build build-essential pkg-config curl
+# Switch to non-root user
+USER user
+# Set home and path for the user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set work directory to user's home directory
+WORKDIR $HOME/app
+# Install Python3 and pip for the runtime container
+USER root
 RUN apt-get install -y python3 python3-pip
+# Switch back to the non-root user for installing Python packages
+USER user
+RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
+    pip install --verbose llama-cpp-python[server]
+# Download the model to the user's directory
+RUN mkdir $HOME/model && \
+    curl -L https://huggingface.co/matthoffner/Magicoder-S-DS-6.7B-GGUF/resolve/main/Magicoder-S-DS-6.7B_Q4_K_M.gguf -o $HOME/model/gguf-model.gguf
+COPY --chown=user ./main.py $HOME/app/
+# Set environment variables for the host
+ENV HOST=0.0.0.0 \
+    PORT=7860
+# Expose the server port
 EXPOSE ${PORT}
 # Run the server start script