Update README.md
Browse files
README.md
CHANGED
@@ -216,7 +216,7 @@ VLLM_USE_PRECOMPILED=1 pip install --editable .
|
|
216 |
**2. Latency benchmarking**
|
217 |
```Shell
|
218 |
export MODEL=Qwen/Qwen3-32B # or pytorch/Qwen3-32B-float8dq
|
219 |
-
VLLM_DISABLE_COMPILE_CACHE=1 python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model MODEL --batch-size 1
|
220 |
```
|
221 |
</details>
|
222 |
|
|
|
216 |
**2. Latency benchmarking**
|
217 |
```Shell
|
218 |
export MODEL=Qwen/Qwen3-32B # or pytorch/Qwen3-32B-float8dq
|
219 |
+
VLLM_DISABLE_COMPILE_CACHE=1 python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model $MODEL --batch-size 1
|
220 |
```
|
221 |
</details>
|
222 |
|