rgerganov commited on
Commit
ecd441a
·
1 Parent(s): 3c0068d
Files changed (1) hide show
  1. Dockerfile +7 -31
Dockerfile CHANGED
@@ -1,38 +1,14 @@
1
  ARG UBUNTU_VERSION=22.04
2
-
3
  # This needs to generally match the container host's environment.
4
- ARG CUDA_VERSION=11.7.1
5
-
6
- # Target the CUDA build image
7
- ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
8
 
9
- FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
-
11
- # Unless otherwise specified, we make a fat build.
12
- ARG CUDA_DOCKER_ARCH=all
13
 
14
  RUN apt-get update && \
15
- apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
16
-
17
- #COPY requirements.txt requirements.txt
18
- #COPY requirements requirements
19
- #
20
- #RUN pip install --upgrade pip setuptools wheel \
21
- # && pip install -r requirements.txt
22
-
23
- RUN git clone https://github.com/ggerganov/llama.cpp /app
24
-
25
- WORKDIR /app
26
-
27
- # Set nvcc architecture
28
- ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
29
- # Enable CUDA
30
- ENV GGML_CUDA=1
31
- # Enable cURL
32
- ENV LLAMA_CURL=1
33
- # Enable RPC
34
- ENV GGML_RPC=1
35
 
36
- RUN make -j$(nproc)
37
 
38
- CMD ["/app/rpc-server", "-p", "7860", "-H", "0.0.0.0"]
 
1
  ARG UBUNTU_VERSION=22.04
 
2
  # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.2.0
4
+ # Target the CUDA runtime image
5
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
 
6
 
7
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
 
 
 
8
 
9
  RUN apt-get update && \
10
+ apt-get install -y libgomp1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ COPY rpc-server /rpc-server
13
 
14
+ ENTRYPOINT ["/rpc-server", "-p", "7860", "-H", "0.0.0.0"]