Update README.md
Browse files
README.md
CHANGED
@@ -174,27 +174,6 @@ output_text = tokenizer.batch_decode(
|
|
174 |
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
175 |
)
|
176 |
print("Response:", output_text[0][len(prompt):])
|
177 |
-
|
178 |
-
# Local Benchmark
|
179 |
-
import torch.utils.benchmark as benchmark
|
180 |
-
from torchao.utils import benchmark_model
|
181 |
-
import torchao
|
182 |
-
|
183 |
-
def benchmark_fn(f, *args, **kwargs):
|
184 |
-
# Manual warmup
|
185 |
-
for _ in range(2):
|
186 |
-
f(*args, **kwargs)
|
187 |
-
|
188 |
-
t0 = benchmark.Timer(
|
189 |
-
stmt="f(*args, **kwargs)",
|
190 |
-
globals={"args": args, "kwargs": kwargs, "f": f},
|
191 |
-
num_threads=torch.get_num_threads(),
|
192 |
-
)
|
193 |
-
return f"{(t0.blocked_autorange().mean):.3f}"
|
194 |
-
|
195 |
-
torchao.quantization.utils.recommended_inductor_config_setter()
|
196 |
-
quantized_model = torch.compile(quantized_model, mode="max-autotune")
|
197 |
-
print(f"{save_to} model:", benchmark_fn(quantized_model.generate, **inputs, max_new_tokens=128))
|
198 |
```
|
199 |
|
200 |
# Model Quality
|
|
|
174 |
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
175 |
)
|
176 |
print("Response:", output_text[0][len(prompt):])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
```
|
178 |
|
179 |
# Model Quality
|