Spaces:
Sleeping
Sleeping
Add application file
Browse files- Dockerfile +97 -0
Dockerfile
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
|
2 |
+
# to run the OpenAI compatible server.
|
3 |
+
|
4 |
+
#################### BASE BUILD IMAGE ####################
|
5 |
+
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
|
6 |
+
|
7 |
+
RUN apt-get update -y \
|
8 |
+
&& apt-get install -y python3-pip git
|
9 |
+
|
10 |
+
WORKDIR /workspace
|
11 |
+
|
12 |
+
# install build and runtime dependencies
|
13 |
+
COPY requirements.txt requirements.txt
|
14 |
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
15 |
+
pip install -r requirements.txt
|
16 |
+
|
17 |
+
# install development dependencies
|
18 |
+
COPY requirements-dev.txt requirements-dev.txt
|
19 |
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
20 |
+
pip install -r requirements-dev.txt
|
21 |
+
#################### BASE BUILD IMAGE ####################
|
22 |
+
|
23 |
+
|
24 |
+
#################### EXTENSION BUILD IMAGE ####################
|
25 |
+
FROM dev AS build
|
26 |
+
|
27 |
+
# install build dependencies
|
28 |
+
COPY requirements-build.txt requirements-build.txt
|
29 |
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
30 |
+
pip install -r requirements-build.txt
|
31 |
+
|
32 |
+
# copy input files
|
33 |
+
COPY csrc csrc
|
34 |
+
COPY setup.py setup.py
|
35 |
+
COPY requirements.txt requirements.txt
|
36 |
+
COPY pyproject.toml pyproject.toml
|
37 |
+
COPY vllm/__init__.py vllm/__init__.py
|
38 |
+
|
39 |
+
# cuda arch list used by torch
|
40 |
+
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
|
41 |
+
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
42 |
+
# max jobs used by Ninja to build extensions
|
43 |
+
ARG max_jobs=2
|
44 |
+
ENV MAX_JOBS=${max_jobs}
|
45 |
+
# number of threads used by nvcc
|
46 |
+
ARG nvcc_threads=8
|
47 |
+
ENV NVCC_THREADS=$nvcc_threads
|
48 |
+
# make sure punica kernels are built (for LoRA)
|
49 |
+
ENV VLLM_INSTALL_PUNICA_KERNELS=1
|
50 |
+
|
51 |
+
RUN python3 setup.py build_ext --inplace
|
52 |
+
#################### EXTENSION Build IMAGE ####################
|
53 |
+
|
54 |
+
|
55 |
+
#################### TEST IMAGE ####################
|
56 |
+
# image to run unit testing suite
|
57 |
+
FROM dev AS test
|
58 |
+
|
59 |
+
# copy pytorch extensions separately to avoid having to rebuild
|
60 |
+
# when python code changes
|
61 |
+
WORKDIR /vllm-workspace
|
62 |
+
# ADD is used to preserve directory structure
|
63 |
+
ADD . /vllm-workspace/
|
64 |
+
COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
|
65 |
+
# ignore build dependencies installation because we are using pre-complied extensions
|
66 |
+
RUN rm pyproject.toml
|
67 |
+
RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip install . --verbose
|
68 |
+
#################### TEST IMAGE ####################
|
69 |
+
|
70 |
+
|
71 |
+
#################### RUNTIME BASE IMAGE ####################
|
72 |
+
# use CUDA base as CUDA runtime dependencies are already installed via pip
|
73 |
+
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS vllm-base
|
74 |
+
|
75 |
+
# libnccl required for ray
|
76 |
+
RUN apt-get update -y \
|
77 |
+
&& apt-get install -y python3-pip
|
78 |
+
|
79 |
+
WORKDIR /workspace
|
80 |
+
COPY requirements.txt requirements.txt
|
81 |
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
82 |
+
pip install -r requirements.txt
|
83 |
+
#################### RUNTIME BASE IMAGE ####################
|
84 |
+
|
85 |
+
|
86 |
+
#################### OPENAI API SERVER ####################
|
87 |
+
# openai api server alternative
|
88 |
+
FROM vllm-base AS vllm-openai
|
89 |
+
# install additional dependencies for openai api server
|
90 |
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
91 |
+
pip install accelerate
|
92 |
+
|
93 |
+
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
94 |
+
COPY vllm vllm
|
95 |
+
|
96 |
+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
97 |
+
#################### OPENAI API SERVER ####################
|