bsmit1659 commited on
Commit
dc22273
·
1 Parent(s): 4427815

Add application file

Browse files
Files changed (1) hide show
  1. Dockerfile +97 -0
Dockerfile ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # The vLLM Dockerfile is used to construct vLLM image that can be directly used
2
+ # to run the OpenAI compatible server.
3
+
4
+ #################### BASE BUILD IMAGE ####################
5
+ FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
6
+
7
+ RUN apt-get update -y \
8
+ && apt-get install -y python3-pip git
9
+
10
+ WORKDIR /workspace
11
+
12
+ # install build and runtime dependencies
13
+ COPY requirements.txt requirements.txt
14
+ RUN --mount=type=cache,target=/root/.cache/pip \
15
+ pip install -r requirements.txt
16
+
17
+ # install development dependencies
18
+ COPY requirements-dev.txt requirements-dev.txt
19
+ RUN --mount=type=cache,target=/root/.cache/pip \
20
+ pip install -r requirements-dev.txt
21
+ #################### BASE BUILD IMAGE ####################
22
+
23
+
24
+ #################### EXTENSION BUILD IMAGE ####################
25
+ FROM dev AS build
26
+
27
+ # install build dependencies
28
+ COPY requirements-build.txt requirements-build.txt
29
+ RUN --mount=type=cache,target=/root/.cache/pip \
30
+ pip install -r requirements-build.txt
31
+
32
+ # copy input files
33
+ COPY csrc csrc
34
+ COPY setup.py setup.py
35
+ COPY requirements.txt requirements.txt
36
+ COPY pyproject.toml pyproject.toml
37
+ COPY vllm/__init__.py vllm/__init__.py
38
+
39
+ # cuda arch list used by torch
40
+ ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
41
+ ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
42
+ # max jobs used by Ninja to build extensions
43
+ ARG max_jobs=2
44
+ ENV MAX_JOBS=${max_jobs}
45
+ # number of threads used by nvcc
46
+ ARG nvcc_threads=8
47
+ ENV NVCC_THREADS=$nvcc_threads
48
+ # make sure punica kernels are built (for LoRA)
49
+ ENV VLLM_INSTALL_PUNICA_KERNELS=1
50
+
51
+ RUN python3 setup.py build_ext --inplace
52
+ #################### EXTENSION Build IMAGE ####################
53
+
54
+
55
+ #################### TEST IMAGE ####################
56
+ # image to run unit testing suite
57
+ FROM dev AS test
58
+
59
+ # copy pytorch extensions separately to avoid having to rebuild
60
+ # when python code changes
61
+ WORKDIR /vllm-workspace
62
+ # ADD is used to preserve directory structure
63
+ ADD . /vllm-workspace/
64
+ COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
65
+ # ignore build dependencies installation because we are using pre-complied extensions
66
+ RUN rm pyproject.toml
67
+ RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip install . --verbose
68
+ #################### TEST IMAGE ####################
69
+
70
+
71
+ #################### RUNTIME BASE IMAGE ####################
72
+ # use CUDA base as CUDA runtime dependencies are already installed via pip
73
+ FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS vllm-base
74
+
75
+ # libnccl required for ray
76
+ RUN apt-get update -y \
77
+ && apt-get install -y python3-pip
78
+
79
+ WORKDIR /workspace
80
+ COPY requirements.txt requirements.txt
81
+ RUN --mount=type=cache,target=/root/.cache/pip \
82
+ pip install -r requirements.txt
83
+ #################### RUNTIME BASE IMAGE ####################
84
+
85
+
86
+ #################### OPENAI API SERVER ####################
87
+ # openai api server alternative
88
+ FROM vllm-base AS vllm-openai
89
+ # install additional dependencies for openai api server
90
+ RUN --mount=type=cache,target=/root/.cache/pip \
91
+ pip install accelerate
92
+
93
+ COPY --from=build /workspace/vllm/*.so /workspace/vllm/
94
+ COPY vllm vllm
95
+
96
+ ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
97
+ #################### OPENAI API SERVER ####################