Update README.md
Browse files
README.md
CHANGED
@@ -328,12 +328,12 @@ python benchmarks/benchmark_serving.py --backend vllm --dataset-name sharegpt --
|
|
328 |
### float8dq
|
329 |
Server:
|
330 |
```Shell
|
331 |
-
vllm serve pytorch/Phi-4-mini-instruct-float8dq --tokenizer microsoft/Phi-4-mini-instruct -O3
|
332 |
```
|
333 |
|
334 |
Client:
|
335 |
```Shell
|
336 |
-
|
337 |
```
|
338 |
|
339 |
# Disclaimer
|
|
|
328 |
### float8dq
|
329 |
Server:
|
330 |
```Shell
|
331 |
+
VLLM_DISABLE_COMPILE_CACHE=1 vllm serve pytorch/Phi-4-mini-instruct-float8dq --tokenizer microsoft/Phi-4-mini-instruct -O3
|
332 |
```
|
333 |
|
334 |
Client:
|
335 |
```Shell
|
336 |
+
python benchmarks/benchmark_serving.py --backend vllm --dataset-name sharegpt --tokenizer microsoft/Phi-4-mini-instruct --dataset-path ./ShareGPT_V3_unfiltered_cleaned_split.json --model jerryzh168/phi4-mini-float8dq --num-prompts 1
|
337 |
```
|
338 |
|
339 |
# Disclaimer
|