Update README.md
Browse files
README.md
CHANGED
@@ -184,8 +184,7 @@ sampling_params = SamplingParams(temperature=0.3, max_tokens=256)
|
|
184 |
|
185 |
llm = LLM(
|
186 |
model=model_path,
|
187 |
-
|
188 |
-
kv_cache_dtype='fp8',
|
189 |
tensor_parallel_size=8,
|
190 |
gpu_memory_utilization=0.95,
|
191 |
enforce_eager=True,
|
|
|
184 |
|
185 |
llm = LLM(
|
186 |
model=model_path,
|
187 |
+
kv_cache_dtype='auto',
|
|
|
188 |
tensor_parallel_size=8,
|
189 |
gpu_memory_utilization=0.95,
|
190 |
enforce_eager=True,
|