Spaces:
Running
Running
Allen Park
commited on
Commit
·
0fbe7c9
1
Parent(s):
8d132dc
make cpu memory usage low
Browse files
app.py
CHANGED
@@ -46,6 +46,7 @@ def model_call(question, document, answer):
|
|
46 |
do_sample=True,
|
47 |
attention_mask=attention_mask,
|
48 |
pad_token_id=tokenizer.eos_token_id,
|
|
|
49 |
)
|
50 |
with torch.no_grad():
|
51 |
outputs = model.generate(**generate_kwargs)
|
|
|
46 |
do_sample=True,
|
47 |
attention_mask=attention_mask,
|
48 |
pad_token_id=tokenizer.eos_token_id,
|
49 |
+
low_cpu_mem_usage=True
|
50 |
)
|
51 |
with torch.no_grad():
|
52 |
outputs = model.generate(**generate_kwargs)
|