matthoffner commited on
Commit
be1949d
·
verified ·
1 Parent(s): 707c746

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +45 -36
Dockerfile CHANGED
@@ -1,51 +1,60 @@
1
- FROM ubuntu:22.04
2
 
3
- ENV DEBIAN_FRONTEND=noninteractive
 
 
 
 
 
4
 
5
- # Update and install necessary dependencies
6
  RUN apt update && \
7
- apt install --no-install-recommends -y \
8
- build-essential \
9
- python3 \
10
- python3-pip \
11
- wget \
12
- curl \
13
- git \
14
- cmake \
15
- zlib1g-dev \
16
- libblas-dev && \
17
- apt clean && \
18
- rm -rf /var/lib/apt/lists/*
19
 
20
  WORKDIR /app
21
 
22
- # Download ggml and mmproj models from HuggingFace
23
- RUN wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf && \
24
- wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf
25
 
26
- # Clone and build llava-server with CUDA support
27
- RUN git clone https://github.com/ggerganov/llama.cpp.git && \
28
- cd llama.cpp && \
29
- git submodule init && \
30
- git submodule update && \
31
- make
32
 
33
- # Create a non-root user for security reasons
34
- RUN useradd -m -u 1000 user && \
35
- mkdir -p /home/user/app && \
36
- cp /app/ggml-model-q4_k.gguf /home/user/app && \
37
- cp /app/mmproj-model-f16.gguf /home/user/app
38
 
39
- RUN chown user:user /home/user/app/ggml-model-q4_k.gguf && \
40
- chown user:user /home/user/app/mmproj-model-f16.gguf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  USER user
43
- ENV HOME=/home/user
 
 
 
44
 
45
  WORKDIR $HOME/app
46
 
47
- # Expose the port
48
- EXPOSE 8080
 
 
 
49
 
50
- # Start the llava-server with models
51
- CMD ["/app/llama.cpp/server", "--model", "ggml-model-q4_k.gguf", "--mmproj", "mmproj-model-f16.gguf", "--host", "0.0.0.0", "--threads", "10"]
 
1
+ FROM nvidia/cuda:11.0.3-base-ubuntu20.04
2
 
3
+ ENV MODEL_NAME="Nous-Hermes-13B-GGUF"
4
+ ENV DEFAULT_MODEL_FILE="Nous-Hermes-13B.Q4_K_M.gguf"
5
+ ENV MODEL_USER="TheBloke"
6
+ ENV DEFAULT_MODEL_BRANCH="main"
7
+ ENV MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${DEFAULT_MODEL_FILE}"
8
+ ENV PATH="/usr/local/cuda/bin:$PATH"
9
 
 
10
  RUN apt update && \
11
+ apt install --no-install-recommends -y build-essential python3 python3-pip wget curl git && \
12
+ apt clean && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
 
 
 
 
 
13
 
14
  WORKDIR /app
15
 
16
+ RUN apt-get install -y wget && \
17
+ wget -qO- "https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local
 
18
 
19
+ COPY requirements.txt ./
 
 
 
 
 
20
 
21
+ RUN pip3 install --upgrade pip && \
22
+ pip3 install -r requirements.txt
 
 
 
23
 
24
+ # Cloning the llama-cpp-python repository
25
+ #RUN git clone https://github.com/abetlen/llama-cpp-python.git
26
+ #WORKDIR llama-cpp-python
27
+ # Initializing and updating the llama.cpp submodule
28
+ #RUN git submodule init && git submodule update
29
+
30
+ # Building the llama-cpp-python
31
+ # RUN FORCE_CMAKE=1 pip install .
32
+
33
+ # Moving back to /app directory
34
+ WORKDIR /app
35
+
36
+ RUN echo ${MODEL_URL} && \
37
+ wget -O /app/${DEFAULT_MODEL_FILE} ${MODEL_URL}
38
+
39
+ RUN useradd -m -u 1000 user
40
+
41
+ RUN mkdir -p /home/user/app && \
42
+ mv /app/${DEFAULT_MODEL_FILE} /home/user/app
43
+
44
+ RUN chown -R user:user /home/user/app
45
 
46
  USER user
47
+ ENV HOME=/home/user \
48
+ PATH=/home/user/.local/bin:$PATH \
49
+ MODEL_NAME=${MODEL_NAME} \
50
+ MODEL_FILE=/home/user/app/${DEFAULT_MODEL_FILE}
51
 
52
  WORKDIR $HOME/app
53
 
54
+ COPY --chown=user . .
55
+
56
+ RUN ls -al
57
+
58
+ EXPOSE 8000
59
 
60
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]