Spaces:

yusufs
/

llama32-3b-instruct

Paused

yusufs commited on 17 days ago

Commit

d968727

verified ·

1 Parent(s): b2bcd0b

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,32 +1,29 @@
 FROM vllm/vllm-openai:v0.10.0
 # e.g. install the `audio` optional dependencies
 # NOTE: Make sure the version of vLLM matches the base image!
 RUN uv pip install --system vllm[audio]==0.10.0
-ENTRYPOINT [
-  "python3",
-  "-m",
-  "vllm.entrypoints.openai.api_server",
-  "meta-llama/Llama-3.2-3B-Instruct"
-]
-# CMD "meta-llama/Llama-3.2-3B-Instruct" \
-#   --task generate \
-#   --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
-#   --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
-#   --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
-#   --seed 42 \
-#   --host 0.0.0.0 \
-#   --port 7860 \
-#   --max-num-batched-tokens 32768 \
-#   --max-model-len 32768 \
-#   --dtype float16 \
-#   --enforce-eager \
-#   --gpu-memory-utilization 0.9 \
-#   --enable-prefix-caching \
-#   --disable-log-requests \
-#   --trust-remote-code
 # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
 # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04

 FROM vllm/vllm-openai:v0.10.0
 # e.g. install the `audio` optional dependencies
 # NOTE: Make sure the version of vLLM matches the base image!
 RUN uv pip install --system vllm[audio]==0.10.0
+ENTRYPOINT ["/bin/bash"]
+CMD vllm serve "meta-llama/Llama-3.2-3B-Instruct" \
+  --task generate \
+  --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
+  --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
+  --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
+  --seed 42 \
+  --host 0.0.0.0 \
+  --port 7860 \
+  --max-num-batched-tokens 32768 \
+  --max-model-len 32768 \
+  --dtype float16 \
+  --enforce-eager \
+  --gpu-memory-utilization 0.9 \
+  --enable-prefix-caching \
+  --disable-log-requests \
+  --trust-remote-code
 # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
 # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04