zhiyucheng commited on
Commit
004f3a4
·
1 Parent(s): 3aaff40

update readme

Browse files
Files changed (1) hide show
  1. README.md +1 -5
README.md CHANGED
@@ -71,13 +71,9 @@ To deploy the quantized FP4 checkpoint with [TensorRT-LLM](https://github.com/NV
71
  ```
72
  from tensorrt_llm import SamplingParams
73
  from tensorrt_llm._torch import LLM
74
- from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
75
-
76
 
77
  def main():
78
 
79
- pytorch_config = PyTorchConfig()
80
-
81
  prompts = [
82
  "Hello, my name is",
83
  "The president of the United States is",
@@ -86,7 +82,7 @@ def main():
86
  ]
87
  sampling_params = SamplingParams(max_tokens=32)
88
 
89
- llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8, pytorch_backend_config=pytorch_config, enable_attention_dp=True)
90
 
91
  outputs = llm.generate(prompts, sampling_params)
92
 
 
71
  ```
72
  from tensorrt_llm import SamplingParams
73
  from tensorrt_llm._torch import LLM
 
 
74
 
75
  def main():
76
 
 
 
77
  prompts = [
78
  "Hello, my name is",
79
  "The president of the United States is",
 
82
  ]
83
  sampling_params = SamplingParams(max_tokens=32)
84
 
85
+ llm = LLM(model="nvidia/DeepSeek-R1-FP4", tensor_parallel_size=8, enable_attention_dp=True)
86
 
87
  outputs = llm.generate(prompts, sampling_params)
88