Update app.py
Browse files
app.py
CHANGED
@@ -80,13 +80,14 @@ def respond(
|
|
80 |
ins_f = generate_prompt(message,None)
|
81 |
inputs = tokenizer(ins_f, return_tensors="pt")
|
82 |
input_ids = inputs["input_ids"].cuda()
|
|
|
83 |
generation_config = GenerationConfig(
|
84 |
temperature=0.1,
|
85 |
top_p=0.75,
|
86 |
top_k=40,
|
87 |
do_sample=True,
|
88 |
num_beams=1,
|
89 |
-
max_new_tokens =
|
90 |
)
|
91 |
|
92 |
# Without streaming
|
@@ -148,4 +149,4 @@ demo = gr.ChatInterface(
|
|
148 |
)
|
149 |
if __name__ == "__main__":
|
150 |
model.eval()
|
151 |
-
demo.launch()
|
|
|
80 |
ins_f = generate_prompt(message,None)
|
81 |
inputs = tokenizer(ins_f, return_tensors="pt")
|
82 |
input_ids = inputs["input_ids"].cuda()
|
83 |
+
max_new_tokens = 512
|
84 |
generation_config = GenerationConfig(
|
85 |
temperature=0.1,
|
86 |
top_p=0.75,
|
87 |
top_k=40,
|
88 |
do_sample=True,
|
89 |
num_beams=1,
|
90 |
+
max_new_tokens = max_new_tokens
|
91 |
)
|
92 |
|
93 |
# Without streaming
|
|
|
149 |
)
|
150 |
if __name__ == "__main__":
|
151 |
model.eval()
|
152 |
+
demo.launch(share=True)
|