Spaces:

rgerganov
/

llama-rpc-server

Paused

rgerganov commited on Aug 15, 2024

Commit

ecd441a

1 Parent(s): 3c0068d

update

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,38 +1,14 @@
 ARG UBUNTU_VERSION=22.04
 # This needs to generally match the container host's environment.
-ARG CUDA_VERSION=11.7.1
-# Target the CUDA build image
-ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
-FROM ${BASE_CUDA_DEV_CONTAINER} AS build
-# Unless otherwise specified, we make a fat build.
-ARG CUDA_DOCKER_ARCH=all
 RUN apt-get update && \
-    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
-#COPY requirements.txt   requirements.txt
-#COPY requirements       requirements
-#
-#RUN pip install --upgrade pip setuptools wheel \
-#    && pip install -r requirements.txt
-RUN git clone https://github.com/ggerganov/llama.cpp /app
-WORKDIR /app
-# Set nvcc architecture
-ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable CUDA
-ENV GGML_CUDA=1
-# Enable cURL
-ENV LLAMA_CURL=1
-# Enable RPC
-ENV GGML_RPC=1
-RUN make -j$(nproc)
-CMD ["/app/rpc-server", "-p", "7860", "-H", "0.0.0.0"]

 ARG UBUNTU_VERSION=22.04
 # This needs to generally match the container host's environment.
+ARG CUDA_VERSION=12.2.0
+# Target the CUDA runtime image
+ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
 RUN apt-get update && \
+    apt-get install -y libgomp1
+COPY rpc-server /rpc-server
+ENTRYPOINT ["/rpc-server", "-p", "7860", "-H", "0.0.0.0"]