yusufs commited on
Commit
cdf13a3
·
verified ·
1 Parent(s): ed90f80

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +1 -18
Dockerfile CHANGED
@@ -6,24 +6,7 @@ FROM vllm/vllm-openai:v0.10.0
6
  # NOTE: Make sure the version of vLLM matches the base image!
7
  RUN uv pip install --system vllm[audio]==0.10.0
8
 
9
- ENTRYPOINT ["/bin/bash", "-c"]
10
-
11
- CMD vllm serve "meta-llama/Llama-3.2-3B-Instruct" \
12
- --task generate \
13
- --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
14
- --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
15
- --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
16
- --seed 42 \
17
- --host 0.0.0.0 \
18
- --port 7860 \
19
- --max-num-batched-tokens 32768 \
20
- --max-model-len 32768 \
21
- --dtype float16 \
22
- --enforce-eager \
23
- --gpu-memory-utilization 0.9 \
24
- --enable-prefix-caching \
25
- --disable-log-requests \
26
- --trust-remote-code
27
 
28
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
29
  # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04
 
6
  # NOTE: Make sure the version of vLLM matches the base image!
7
  RUN uv pip install --system vllm[audio]==0.10.0
8
 
9
+ ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
12
  # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04