yusufs commited on
Commit
f6a5a01
·
1 Parent(s): 524a82b

fix(--token): remove --token not exist

Browse files
Files changed (1) hide show
  1. Dockerfile +4 -2
Dockerfile CHANGED
@@ -28,14 +28,16 @@ USER myuser
28
  RUN mkdir -p /tmp/.cache/huggingface
29
 
30
  # to be set at runtime from secrets
31
- ENV TOKEN="xxx"
 
 
32
  ENV MODEL_NAME="meta-llama/Llama-3.2-3B-Instruct"
33
  ENV MODEL_REVISION="0cb88a4f764b7a12671c53f0838cd831a0843b95"
34
  ENV HF_HOME="/tmp/.cache/huggingface"
35
 
36
  EXPOSE 7860
37
 
38
- ENTRYPOINT ["/bin/bash", "-c", "vllm serve ${MODEL_NAME} --token ${TOKEN} --task generate --revision ${MODEL_REVISION} --code-revision ${MODEL_REVISION} --tokenizer-revision ${MODEL_NAME} --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
39
 
40
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
41
  # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04
 
28
  RUN mkdir -p /tmp/.cache/huggingface
29
 
30
  # to be set at runtime from secrets
31
+ ENV TASK="generate"
32
+ ENV MAX_MODEL_LEN=32768
33
+ ENV MAX_NUM_BATCHED_TOKENS=32768
34
  ENV MODEL_NAME="meta-llama/Llama-3.2-3B-Instruct"
35
  ENV MODEL_REVISION="0cb88a4f764b7a12671c53f0838cd831a0843b95"
36
  ENV HF_HOME="/tmp/.cache/huggingface"
37
 
38
  EXPOSE 7860
39
 
40
+ ENTRYPOINT ["/bin/bash", "-c", "vllm serve ${MODEL_NAME} --task ${TASK} --revision ${MODEL_REVISION} --code-revision ${MODEL_REVISION} --tokenizer-revision ${MODEL_NAME} --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens ${MAX_NUM_BATCHED_TOKENS}} --max-model-len ${MAX_MODEL_LEN} --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
41
 
42
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
43
  # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04