Spaces:
Paused
Paused
Commit
·
c38f54f
1
Parent(s):
ca98966
Update app_v1.py
Browse files
app_v1.py
CHANGED
@@ -6,10 +6,10 @@ import os
|
|
6 |
import torch
|
7 |
|
8 |
# Clear up some memory
|
9 |
-
torch.cuda.empty_cache()
|
10 |
|
11 |
# Try reducing the number of threads PyTorch uses
|
12 |
-
torch.set_num_threads(1)
|
13 |
|
14 |
cwd = os.getcwd()
|
15 |
cachedir = cwd + '/cache'
|
@@ -53,20 +53,22 @@ model = AutoGPTQForCausalLM.from_quantized(
|
|
53 |
quantize_config=quantize_config
|
54 |
)
|
55 |
|
56 |
-
|
57 |
|
58 |
-
|
|
|
|
|
59 |
|
60 |
# Generate output when the "Generate" button is pressed
|
61 |
-
if st.button("Generate the prompt"):
|
62 |
-
inputs = tokenizer(prompt_template, return_tensors="pt")
|
63 |
-
outputs = model.generate(
|
64 |
-
input_ids=inputs.input_ids.to("cuda:0"),
|
65 |
-
attention_mask=inputs.attention_mask.to("cuda:0"),
|
66 |
-
max_length=512 + inputs.input_ids.size(-1),
|
67 |
-
temperature=0.1,
|
68 |
-
top_p=0.95,
|
69 |
-
repetition_penalty=1.15
|
70 |
-
|
71 |
-
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
72 |
-
st.text_area("Prompt", value=generated_text)
|
|
|
6 |
import torch
|
7 |
|
8 |
# Clear up some memory
|
9 |
+
# torch.cuda.empty_cache()
|
10 |
|
11 |
# Try reducing the number of threads PyTorch uses
|
12 |
+
# torch.set_num_threads(1)
|
13 |
|
14 |
cwd = os.getcwd()
|
15 |
cachedir = cwd + '/cache'
|
|
|
53 |
quantize_config=quantize_config
|
54 |
)
|
55 |
|
56 |
+
st.write(model.hf_device_map)
|
57 |
|
58 |
+
#user_input = st.text_input("Input a phrase")
|
59 |
+
|
60 |
+
#prompt_template = f'USER: {user_input}\nASSISTANT:'
|
61 |
|
62 |
# Generate output when the "Generate" button is pressed
|
63 |
+
#if st.button("Generate the prompt"):
|
64 |
+
# inputs = tokenizer(prompt_template, return_tensors="pt")
|
65 |
+
# outputs = model.generate(
|
66 |
+
# input_ids=inputs.input_ids.to("cuda:0"),
|
67 |
+
# attention_mask=inputs.attention_mask.to("cuda:0"),
|
68 |
+
# max_length=512 + inputs.input_ids.size(-1),
|
69 |
+
# temperature=0.1,
|
70 |
+
# top_p=0.95,
|
71 |
+
# repetition_penalty=1.15
|
72 |
+
# )
|
73 |
+
# generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
74 |
+
# st.text_area("Prompt", value=generated_text)
|