muryshev commited on
Commit
a93707f
·
2 Parent(s): 7c788bd 4692147

Merge branch 'main' of hf.co:spaces/muryshev/llama-cpp-server

Browse files
Files changed (2) hide show
  1. Dockerfile +14 -25
  2. run.sh +2 -0
Dockerfile CHANGED
@@ -1,42 +1,31 @@
1
  ARG UBUNTU_VERSION=22.04
2
- ARG CUDA_VERSION=12.3.1
3
- ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
4
- ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
5
 
6
- FROM ${BASE_CUDA_DEV_CONTAINER} as build
7
-
8
- ARG CUDA_DOCKER_ARCH=all
9
 
10
  RUN apt-get update && \
11
- apt-get install -y build-essential git cmake wget
12
-
13
- WORKDIR /build
14
 
 
15
  RUN git clone https://github.com/ggerganov/llama.cpp.git
16
 
17
- WORKDIR /build/llama.cpp
18
-
19
- ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
20
- ENV LLAMA_CUBLAS=1
21
-
22
- RUN mkdir build && \
23
- cd build && \
24
- # cmake .. #-DLLAMA_CUBLAS=ON && \
25
- cmake .. && \
26
- cmake --build . --config Release
27
 
28
  WORKDIR /data
29
  RUN wget https://huggingface.co/IlyaGusev/saiga2_7b_gguf/resolve/main/model-q8_0.gguf -nv -O model.gguf
30
 
31
- FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
32
-
33
 
34
  WORKDIR /app
35
 
36
- # Copy the executable from the build stage
37
- COPY --from=build /build/llama.cpp/build/bin/server /app
38
  COPY --from=build /data/model.gguf /data/model.gguf
39
- WORKDIR /app
 
 
 
 
 
40
  EXPOSE 7860
41
 
42
- CMD ./server -m /data/model.gguf -c 2048 --port 7860 --host 0.0.0.0
 
1
  ARG UBUNTU_VERSION=22.04
 
 
 
2
 
3
+ FROM ubuntu:$UBUNTU_VERSION as build
 
 
4
 
5
  RUN apt-get update && \
6
+ apt-get install -y build-essential git wget
 
 
7
 
8
+ WORKDIR /app
9
  RUN git clone https://github.com/ggerganov/llama.cpp.git
10
 
11
+ WORKDIR /app/llama.cpp
12
+ RUN make
 
 
 
 
 
 
 
 
13
 
14
  WORKDIR /data
15
  RUN wget https://huggingface.co/IlyaGusev/saiga2_7b_gguf/resolve/main/model-q8_0.gguf -nv -O model.gguf
16
 
17
+ FROM ubuntu:$UBUNTU_VERSION as runtime
 
18
 
19
  WORKDIR /app
20
 
21
+ COPY --from=build /app/llama.cpp/server /app
 
22
  COPY --from=build /data/model.gguf /data/model.gguf
23
+ COPY ./run.sh /app/run.sh
24
+
25
+ RUN chmod +x run.sh
26
+
27
+ ENV LC_ALL=C.utf8
28
+
29
  EXPOSE 7860
30
 
31
+ CMD ./run.sh
run.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ /app/server -m /data/model.gguf -c 4096 --port 7860 --host 0.0.0.0