File size: 2,137 Bytes
e524e3e 76fb5c4 e524e3e 572e6c5 e524e3e 572e6c5 2ae36bd e524e3e 572e6c5 e524e3e 572e6c5 e524e3e 572e6c5 e524e3e 572e6c5 e524e3e 2ae36bd e524e3e 106db30 e524e3e 106db30 e524e3e 106db30 e524e3e 106db30 e524e3e 106db30 c6638aa 106db30 6ebdc50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# Set arguments for versions
ARG UBUNTU_VERSION=22.04
ARG CUDA_VERSION=11.7.1
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
# Build stage with CUDA development container
FROM ${BASE_CUDA_DEV_CONTAINER} as build
# Install build essentials and git
RUN apt-get update && \
apt-get install -y build-essential git
# Install Python3 and pip
RUN apt-get install -y python3 python3-pip
# Set work directory to /app
WORKDIR /app
# Copy your application code to the container
COPY . .
# Create a non-root user 'user' in the build stage as well
RUN useradd -m -u 1000 user
# Switch to the non-root user for any further commands
USER user
# Set nvcc architecture and enable cuBLAS
ENV CUDA_DOCKER_ARCH=all \
LLAMA_CUBLAS=1
# Runtime stage with CUDA runtime container
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
# Re-create the non-root user 'user' in the runtime stage
RUN useradd -m -u 1000 user && \
apt-get update && \
apt-get install -y libopenblas-dev ninja-build build-essential pkg-config curl
# Switch to non-root user
USER user
# Set home and path for the user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH
# Set work directory to user's home directory
WORKDIR $HOME/app
# Install Python3 and pip for the runtime container
USER root
RUN apt-get install -y python3 python3-pip
# Switch back to the non-root user for installing Python packages
USER user
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
pip install --verbose llama-cpp-python[server]
# Download the model to the user's directory
RUN mkdir $HOME/model && \
curl -L https://huggingface.co/matthoffner/Magicoder-S-DS-6.7B-GGUF/resolve/main/Magicoder-S-DS-6.7B_Q4_K_M.gguf -o $HOME/model/gguf-model.gguf
COPY --chown=user ./main.py $HOME/app/
# Set environment variables for the host
ENV HOST=0.0.0.0 \
PORT=7860
# Expose the server port
EXPOSE ${PORT}
RUN ls -la $HOME/model
# Run the server start script
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|