matthoffner commited on
Commit
6520f0e
·
1 Parent(s): f7bdab8

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +17 -36
Dockerfile CHANGED
@@ -1,52 +1,33 @@
1
  FROM nvidia/cuda:12.0.0-cudnn8-devel-ubuntu22.04
2
 
3
- ENV MODEL_NAME="Phind-CodeLlama-34B-v2-GGUF"
4
- ENV DEFAULT_MODEL_FILE="phind-codellama-34b-v2.Q2_K.gguf"
5
- ENV MODEL_USER="TheBloke"
6
- ENV DEFAULT_MODEL_BRANCH="main"
7
- ENV MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${DEFAULT_MODEL_FILE}"
8
- ENV PATH="/usr/local/cuda/bin:$PATH"
9
-
10
  RUN apt update && \
11
- apt install --no-install-recommends -y build-essential python3 python3-pip wget curl git && \
12
  apt clean && rm -rf /var/lib/apt/lists/*
13
 
14
  WORKDIR /app
15
 
16
- RUN apt-get install -y wget && \
17
- wget -qO- "https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local
18
-
19
- RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install git+https://github.com/abetlen/llama-cpp-python --no-cache-dir
20
-
21
- COPY requirements.txt ./
22
-
23
- RUN pip3 install --upgrade pip && \
24
- pip3 install -r requirements.txt
25
-
26
- WORKDIR /app
27
 
28
- RUN echo ${MODEL_URL} && \
29
- wget -O /app/${DEFAULT_MODEL_FILE} ${MODEL_URL}
 
 
 
 
30
 
 
31
  RUN useradd -m -u 1000 user
32
 
33
- RUN mkdir -p /home/user/app && \
34
- mv /app/${DEFAULT_MODEL_FILE} /home/user/app
35
-
36
- RUN chown -R user:user /home/user/app
37
-
38
  USER user
39
- ENV HOME=/home/user \
40
- PATH=/home/user/.local/bin:$PATH \
41
- MODEL_NAME=${MODEL_NAME} \
42
- MODEL_FILE=/home/user/app/${DEFAULT_MODEL_FILE}
43
 
44
  WORKDIR $HOME/app
45
 
46
- COPY --chown=user . .
47
-
48
- RUN ls -al
49
-
50
- EXPOSE 8000
51
 
52
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
 
 
1
  FROM nvidia/cuda:12.0.0-cudnn8-devel-ubuntu22.04
2
 
3
+ # Install dependencies
 
 
 
 
 
 
4
  RUN apt update && \
5
+ apt install --no-install-recommends -y build-essential python3 python3-pip wget curl git cmake && \
6
  apt clean && rm -rf /var/lib/apt/lists/*
7
 
8
  WORKDIR /app
9
 
10
+ # Download ggml and mmproj models from HuggingFace
11
+ RUN wget https://huggingface.co/mys/ggml_llava-v1.5-13b/raw/main/ggml-model-q4_k.gguf -O ggml-model-q4_k.gguf && \
12
+ wget https://huggingface.co/mys/ggml_llava-v1.5-13b/raw/main/mmproj-model-f16.gguf -O mmproj-model-f16.gguf
 
 
 
 
 
 
 
 
13
 
14
+ # Clone and build llava-server
15
+ RUN git clone https://github.com/trzy/llava-cpp-server.git llava && \
16
+ cd llava && \
17
+ git submodule init && \
18
+ git submodule update && \
19
+ make
20
 
21
+ # Create a non-root user for security reasons
22
  RUN useradd -m -u 1000 user
23
 
 
 
 
 
 
24
  USER user
25
+ ENV HOME=/home/user
 
 
 
26
 
27
  WORKDIR $HOME/app
28
 
29
+ # Expose the port
30
+ EXPOSE 8080
 
 
 
31
 
32
+ # Start the llava-server with models
33
+ CMD ["./llava/bin/llava-server", "-m", "./ggml-model-q4_k.gguf", "--mmproj", "./mmproj-model-f16.gguf", "--host", "0.0.0.0", "--port", "8080"]