Update README.md
Browse files
README.md
CHANGED
@@ -305,7 +305,7 @@ python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model
|
|
305 |
|
306 |
### float8dq
|
307 |
```Shell
|
308 |
-
python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model pytorch/Phi-4-mini-instruct-float8dq --batch-size 1
|
309 |
```
|
310 |
|
311 |
## benchmark_serving
|
@@ -333,7 +333,7 @@ vllm serve pytorch/Phi-4-mini-instruct-float8dq --tokenizer microsoft/Phi-4-mini
|
|
333 |
|
334 |
Client:
|
335 |
```Shell
|
336 |
-
python benchmarks/benchmark_serving.py --backend vllm --dataset-name sharegpt --tokenizer microsoft/Phi-4-mini-instruct --dataset-path ./ShareGPT_V3_unfiltered_cleaned_split.json --model jerryzh168/phi4-mini-float8dq --num-prompts 1
|
337 |
```
|
338 |
|
339 |
# Disclaimer
|
|
|
305 |
|
306 |
### float8dq
|
307 |
```Shell
|
308 |
+
VLLM_DISABLE_COMPILE_CACHE=1 python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model pytorch/Phi-4-mini-instruct-float8dq --batch-size 1
|
309 |
```
|
310 |
|
311 |
## benchmark_serving
|
|
|
333 |
|
334 |
Client:
|
335 |
```Shell
|
336 |
+
VLLM_DISABLE_COMPILE_CACHE=1 python benchmarks/benchmark_serving.py --backend vllm --dataset-name sharegpt --tokenizer microsoft/Phi-4-mini-instruct --dataset-path ./ShareGPT_V3_unfiltered_cleaned_split.json --model jerryzh168/phi4-mini-float8dq --num-prompts 1
|
337 |
```
|
338 |
|
339 |
# Disclaimer
|