llama-cpp-server

Paused

File size: 2,137 Bytes

e524e3e
76fb5c4
 
 
 
 
e524e3e
572e6c5
 
e524e3e
572e6c5
 
 
2ae36bd
 
 
e524e3e
572e6c5
 
e524e3e
572e6c5
 
e524e3e
 
 
 
 
572e6c5
e524e3e
 
 
 
 
572e6c5
 
e524e3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ae36bd
 
e524e3e
 
 
 
106db30
e524e3e
 
 
106db30
e524e3e
106db30
e524e3e
 
 
106db30
e524e3e
106db30
 
c6638aa
 
106db30
6ebdc50

# Set arguments for versions
ARG UBUNTU_VERSION=22.04
ARG CUDA_VERSION=11.7.1
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

# Build stage with CUDA development container
FROM ${BASE_CUDA_DEV_CONTAINER} as build

# Install build essentials and git
RUN apt-get update && \
    apt-get install -y build-essential git

# Install Python3 and pip
RUN apt-get install -y python3 python3-pip

# Set work directory to /app
WORKDIR /app

# Copy your application code to the container
COPY . .

# Create a non-root user 'user' in the build stage as well
RUN useradd -m -u 1000 user

# Switch to the non-root user for any further commands
USER user

# Set nvcc architecture and enable cuBLAS
ENV CUDA_DOCKER_ARCH=all \
    LLAMA_CUBLAS=1

# Runtime stage with CUDA runtime container
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime

# Re-create the non-root user 'user' in the runtime stage
RUN useradd -m -u 1000 user && \
    apt-get update && \
    apt-get install -y libopenblas-dev ninja-build build-essential pkg-config curl

# Switch to non-root user
USER user

# Set home and path for the user
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH

# Set work directory to user's home directory
WORKDIR $HOME/app

# Install Python3 and pip for the runtime container
USER root
RUN apt-get install -y python3 python3-pip

# Switch back to the non-root user for installing Python packages
USER user
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
    pip install --verbose llama-cpp-python[server]

# Download the model to the user's directory
RUN mkdir $HOME/model && \
    curl -L https://huggingface.co/matthoffner/Magicoder-S-DS-6.7B-GGUF/resolve/main/Magicoder-S-DS-6.7B_Q4_K_M.gguf -o $HOME/model/gguf-model.gguf

COPY --chown=user ./main.py $HOME/app/

# Set environment variables for the host
ENV HOST=0.0.0.0 \
    PORT=7860

# Expose the server port
EXPOSE ${PORT}

RUN ls -la $HOME/model

# Run the server start script
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]