Commit
·
004f3a4
1
Parent(s):
3aaff40
update readme
Browse files
README.md
CHANGED
@@ -71,13 +71,9 @@ To deploy the quantized FP4 checkpoint with [TensorRT-LLM](https://github.com/NV
|
|
71 |
```
|
72 |
from tensorrt_llm import SamplingParams
|
73 |
from tensorrt_llm._torch import LLM
|
74 |
-
from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
|
75 |
-
|
76 |
|
77 |
def main():
|
78 |
|
79 |
-
pytorch_config = PyTorchConfig()
|
80 |
-
|
81 |
prompts = [
|
82 |
"Hello, my name is",
|
83 |
"The president of the United States is",
|
@@ -86,7 +82,7 @@ def main():
|
|
86 |
]
|
87 |
sampling_params = SamplingParams(max_tokens=32)
|
88 |
|
89 |
-
llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8,
|
90 |
|
91 |
outputs = llm.generate(prompts, sampling_params)
|
92 |
|
|
|
71 |
```
|
72 |
from tensorrt_llm import SamplingParams
|
73 |
from tensorrt_llm._torch import LLM
|
|
|
|
|
74 |
|
75 |
def main():
|
76 |
|
|
|
|
|
77 |
prompts = [
|
78 |
"Hello, my name is",
|
79 |
"The president of the United States is",
|
|
|
82 |
]
|
83 |
sampling_params = SamplingParams(max_tokens=32)
|
84 |
|
85 |
+
llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8, enable_attention_dp=True)
|
86 |
|
87 |
outputs = llm.generate(prompts, sampling_params)
|
88 |
|