tencent
/

Hunyuan-A13B-Instruct-FP8

Text Generation

Model card Files Files and versions Community

manaestras commited on about 1 month ago

Commit

51ff041

·

verified ·

1 Parent(s): 2645d0f

update readme for fp8 (#1)

- update readme for fp8 (57b5f94f4c4e7facb6014752d5de02404a1c3858)

Files changed (1) hide show

README.md +14 -7

README.md CHANGED Viewed

@@ -153,7 +153,7 @@ docker pull hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm
 - Download Model file:
   - Huggingface:  will download automicly by vllm.
-  - ModelScope: `modelscope download --model Tencent-Hunyuan/Hunyuan-A13B-Instruct`
 - Start the API server:
@@ -165,7 +165,7 @@ docker run  --privileged --user root  --net=host --ipc=host \
         --gpus=all -it --entrypoint python  hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm
  \
          -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 \
-         --tensor-parallel-size 4 --model tencent/Hunyuan-A13B-Instruct --trust-remote-code
 ```
@@ -174,8 +174,9 @@ model downloaded by modelscope:
 docker run  --privileged --user root  --net=host --ipc=host \
         -v ~/.cache/modelscope:/root/.cache/modelscope \
         --gpus=all -it --entrypoint python   hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm \
-         -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --tensor-parallel-size 4 --port 8000 \
-         --model /root/.cache/modelscope/hub/models/Tencent-Hunyuan/Hunyuan-A13B-Instruct/ --trust_remote_code
 ```
@@ -190,7 +191,13 @@ To get started:
 - Pull the Docker image
 ```
-docker pull tiacc-test.tencentcloudcr.com/tiacc/sglang:0.4.7
 ```
 - Start the API server:
@@ -200,8 +207,8 @@ docker run --gpus all \
     --shm-size 32g \
     -p 30000:30000 \
     --ipc=host \
-    tiacc-test.tencentcloudcr.com/tiacc/sglang:0.4.7 \
-    -m sglang.launch_server --model-path hunyuan/huanyuan_A13B --tp 4 --trust-remote-code --host 0.0.0.0 --port 30000
 ```

 - Download Model file:
   - Huggingface:  will download automicly by vllm.
+  - ModelScope: `modelscope download --model Tencent-Hunyuan/Hunyuan-A13B-Instruct-FP8`
 - Start the API server:
         --gpus=all -it --entrypoint python  hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm
  \
          -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 \
+         --tensor-parallel-size 2 --dtype bfloat16 --kv-cache-dtype fp8 --model tencent/Hunyuan-A13B-Instruct-FP8 --trust-remote-code
 ```
 docker run  --privileged --user root  --net=host --ipc=host \
         -v ~/.cache/modelscope:/root/.cache/modelscope \
         --gpus=all -it --entrypoint python   hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm \
+         -m vllm.entrypoints.openai.api_server --host 0.0.0.0  --port 8000 \
+        --tensor-parallel-size 2 --dtype bfloat16 --kv-cache-dtype fp8 \
+         --model /root/.cache/modelscope/hub/models/Tencent-Hunyuan/Hunyuan-A13B-Instruct-FP8 --trust_remote_code
 ```
 - Pull the Docker image
 ```
+# china mirror
+docker pull docker.cnb.cool/tencent/hunyuan/hunyuan-a13b:hunyuan-moe-A13B-sglang
+# docker hub:
+docker pull hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-sglang
 ```
 - Start the API server:
     --shm-size 32g \
     -p 30000:30000 \
     --ipc=host \
+    hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-sglang \
+    -m sglang.launch_server --model-path hunyuan/Hunyuan-A13B-Instruct-FP8 --tp 2 --trust-remote-code --host 0.0.0.0 --port 30000
 ```