Update README.md
Browse files
README.md
CHANGED
@@ -14,10 +14,12 @@ pipeline_tag: text-generation
|
|
14 |
|
15 |
# 1. Inference with vLLM
|
16 |
```Shell
|
|
|
17 |
VLLM_DISABLE_COMPILE_CACHE=1 vllm serve SocialLocalMobile/Qwen3-32B-float8dq --tokenizer Qwen/Qwen3-32B -O3
|
18 |
```
|
19 |
|
20 |
```Shell
|
|
|
21 |
curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
|
22 |
"model": "SocialLocalMobile/Qwen3-32B-float8dq",
|
23 |
"messages": [
|
|
|
14 |
|
15 |
# 1. Inference with vLLM
|
16 |
```Shell
|
17 |
+
# Server
|
18 |
VLLM_DISABLE_COMPILE_CACHE=1 vllm serve SocialLocalMobile/Qwen3-32B-float8dq --tokenizer Qwen/Qwen3-32B -O3
|
19 |
```
|
20 |
|
21 |
```Shell
|
22 |
+
# Client
|
23 |
curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
|
24 |
"model": "SocialLocalMobile/Qwen3-32B-float8dq",
|
25 |
"messages": [
|