yusufs commited on
Commit
b2bcd0b
·
verified ·
1 Parent(s): e1a45e0

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +22 -17
Dockerfile CHANGED
@@ -5,23 +5,28 @@ FROM vllm/vllm-openai:v0.10.0
5
  # NOTE: Make sure the version of vLLM matches the base image!
6
  RUN uv pip install --system vllm[audio]==0.10.0
7
 
8
- ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
9
- CMD "meta-llama/Llama-3.2-3B-Instruct" \
10
- --task generate \
11
- --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
12
- --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
13
- --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
14
- --seed 42 \
15
- --host 0.0.0.0 \
16
- --port 7860 \
17
- --max-num-batched-tokens 32768 \
18
- --max-model-len 32768 \
19
- --dtype float16 \
20
- --enforce-eager \
21
- --gpu-memory-utilization 0.9 \
22
- --enable-prefix-caching \
23
- --disable-log-requests \
24
- --trust-remote-code
 
 
 
 
 
25
 
26
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
27
  # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04
 
5
  # NOTE: Make sure the version of vLLM matches the base image!
6
  RUN uv pip install --system vllm[audio]==0.10.0
7
 
8
+ ENTRYPOINT [
9
+ "python3",
10
+ "-m",
11
+ "vllm.entrypoints.openai.api_server",
12
+ "meta-llama/Llama-3.2-3B-Instruct"
13
+ ]
14
+ # CMD "meta-llama/Llama-3.2-3B-Instruct" \
15
+ # --task generate \
16
+ # --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
17
+ # --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
18
+ # --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
19
+ # --seed 42 \
20
+ # --host 0.0.0.0 \
21
+ # --port 7860 \
22
+ # --max-num-batched-tokens 32768 \
23
+ # --max-model-len 32768 \
24
+ # --dtype float16 \
25
+ # --enforce-eager \
26
+ # --gpu-memory-utilization 0.9 \
27
+ # --enable-prefix-caching \
28
+ # --disable-log-requests \
29
+ # --trust-remote-code
30
 
31
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
32
  # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04