manaestras commited on
Commit
51ff041
·
verified ·
1 Parent(s): 2645d0f

update readme for fp8 (#1)

Browse files

- update readme for fp8 (57b5f94f4c4e7facb6014752d5de02404a1c3858)

Files changed (1) hide show
  1. README.md +14 -7
README.md CHANGED
@@ -153,7 +153,7 @@ docker pull hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm
153
 
154
  - Download Model file:
155
  - Huggingface: will download automicly by vllm.
156
- - ModelScope: `modelscope download --model Tencent-Hunyuan/Hunyuan-A13B-Instruct`
157
 
158
 
159
  - Start the API server:
@@ -165,7 +165,7 @@ docker run --privileged --user root --net=host --ipc=host \
165
  --gpus=all -it --entrypoint python hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm
166
  \
167
  -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 \
168
- --tensor-parallel-size 4 --model tencent/Hunyuan-A13B-Instruct --trust-remote-code
169
 
170
  ```
171
 
@@ -174,8 +174,9 @@ model downloaded by modelscope:
174
  docker run --privileged --user root --net=host --ipc=host \
175
  -v ~/.cache/modelscope:/root/.cache/modelscope \
176
  --gpus=all -it --entrypoint python hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm \
177
- -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --tensor-parallel-size 4 --port 8000 \
178
- --model /root/.cache/modelscope/hub/models/Tencent-Hunyuan/Hunyuan-A13B-Instruct/ --trust_remote_code
 
179
  ```
180
 
181
 
@@ -190,7 +191,13 @@ To get started:
190
  - Pull the Docker image
191
 
192
  ```
193
- docker pull tiacc-test.tencentcloudcr.com/tiacc/sglang:0.4.7
 
 
 
 
 
 
194
  ```
195
 
196
  - Start the API server:
@@ -200,8 +207,8 @@ docker run --gpus all \
200
  --shm-size 32g \
201
  -p 30000:30000 \
202
  --ipc=host \
203
- tiacc-test.tencentcloudcr.com/tiacc/sglang:0.4.7 \
204
- -m sglang.launch_server --model-path hunyuan/huanyuan_A13B --tp 4 --trust-remote-code --host 0.0.0.0 --port 30000
205
  ```
206
 
207
 
 
153
 
154
  - Download Model file:
155
  - Huggingface: will download automicly by vllm.
156
+ - ModelScope: `modelscope download --model Tencent-Hunyuan/Hunyuan-A13B-Instruct-FP8`
157
 
158
 
159
  - Start the API server:
 
165
  --gpus=all -it --entrypoint python hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm
166
  \
167
  -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 \
168
+ --tensor-parallel-size 2 --dtype bfloat16 --kv-cache-dtype fp8 --model tencent/Hunyuan-A13B-Instruct-FP8 --trust-remote-code
169
 
170
  ```
171
 
 
174
  docker run --privileged --user root --net=host --ipc=host \
175
  -v ~/.cache/modelscope:/root/.cache/modelscope \
176
  --gpus=all -it --entrypoint python hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-vllm \
177
+ -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 \
178
+ --tensor-parallel-size 2 --dtype bfloat16 --kv-cache-dtype fp8 \
179
+ --model /root/.cache/modelscope/hub/models/Tencent-Hunyuan/Hunyuan-A13B-Instruct-FP8 --trust_remote_code
180
  ```
181
 
182
 
 
191
  - Pull the Docker image
192
 
193
  ```
194
+
195
+ # china mirror
196
+ docker pull docker.cnb.cool/tencent/hunyuan/hunyuan-a13b:hunyuan-moe-A13B-sglang
197
+
198
+ # docker hub:
199
+ docker pull hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-sglang
200
+
201
  ```
202
 
203
  - Start the API server:
 
207
  --shm-size 32g \
208
  -p 30000:30000 \
209
  --ipc=host \
210
+ hunyuaninfer/hunyuan-a13b:hunyuan-moe-A13B-sglang \
211
+ -m sglang.launch_server --model-path hunyuan/Hunyuan-A13B-Instruct-FP8 --tp 2 --trust-remote-code --host 0.0.0.0 --port 30000
212
  ```
213
 
214