Spaces:
Paused
Paused
File size: 1,706 Bytes
be1949d 3e50743 9a57e00 be1949d e5bf1d7 be1949d e5bf1d7 be1949d 7131f6c 6cba908 be1949d 998196e 636bd97 be1949d 6db6156 be1949d 6db6156 396d41c be1949d e5bf1d7 8235514 be1949d c111476 4133835 be1949d 79d7c2e 6db6156 be1949d 6db6156 be1949d 6db6156 e5f75b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
FROM nvidia/cuda:11.0.3-base-ubuntu20.04
ENV MODEL_NAME="llava-1.6-mistral-7b-gguf"
ENV DEFAULT_MODEL_FILE="llava-v1.6-mistral-7b.Q3_K_XS.gguf"
ENV MODEL_USER="cjpais"
ENV DEFAULT_MODEL_BRANCH="main"
ENV DEFAULT_CLIP_MODEL_FILE="mmproj-model-f16.gguf"
ENV MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${DEFAULT_MODEL_FILE}"
ENV CLIP_MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${DEFAULT_CLIP_MODEL_FILE}"
ENV PATH="/usr/local/cuda/bin:$PATH"
RUN apt update && \
apt install --no-install-recommends -y build-essential python3 python3-pip wget curl git && \
apt clean && rm -rf /var/lib/apt/lists/*
WORKDIR /app
RUN apt-get install -y wget && \
wget -qO- "https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local
COPY requirements.txt ./
RUN pip install --upgrade pip && \
pip install -r requirements.txt
# Moving back to /app directory
WORKDIR /app
RUN echo ${MODEL_URL} && \
wget -O /app/${DEFAULT_MODEL_FILE} ${MODEL_URL}
RUN echo ${CLIP_MODEL_URL} && \
wget -O /app/${DEFAULT_CLIP_MODEL_FILE} ${CLIP_MODEL_URL}
RUN useradd -m -u 1000 user
RUN mkdir -p /home/user/app && \
mv /app/${DEFAULT_MODEL_FILE} /home/user/app && \
mv /app/${DEFAULT_CLIP_MODEL_FILE} /home/user/app
RUN chown -R user:user /home/user/app
USER user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
MODEL_NAME=${MODEL_NAME} \
MODEL_FILE=/home/user/app/${DEFAULT_MODEL_FILE}
WORKDIR $HOME/app
COPY --chown=user . .
RUN ls -al
EXPOSE 8000
CMD ["python3", "-m", "llama_cpp.server", "--config_file", "config.toml"]
|