Update app.py
Browse files
app.py
CHANGED
@@ -84,7 +84,7 @@ def generator(input_ids, generation_config, max_new_tokens):
|
|
84 |
)
|
85 |
return generation_output
|
86 |
|
87 |
-
@spaces.GPU(duration=
|
88 |
def respond(
|
89 |
message,
|
90 |
history: list[tuple[str, str]],
|
@@ -105,7 +105,15 @@ def respond(
|
|
105 |
num_beams=1,
|
106 |
max_new_tokens = max_new_tokens
|
107 |
)
|
108 |
-
generation_output = generator(input_ids, generation_config, max_new_tokens)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
s = generation_output.sequences[0]
|
110 |
output = tokenizer.decode(s)
|
111 |
response = output.split("Response:")[1].strip()
|
|
|
84 |
)
|
85 |
return generation_output
|
86 |
|
87 |
+
@spaces.GPU(duration=120)
|
88 |
def respond(
|
89 |
message,
|
90 |
history: list[tuple[str, str]],
|
|
|
105 |
num_beams=1,
|
106 |
max_new_tokens = max_new_tokens
|
107 |
)
|
108 |
+
#generation_output = generator(input_ids, generation_config, max_new_tokens)
|
109 |
+
with torch.no_grad():
|
110 |
+
generation_output = model.generate(
|
111 |
+
input_ids=input_ids,
|
112 |
+
generation_config=generation_config,
|
113 |
+
return_dict_in_generate=True,
|
114 |
+
output_scores=False,
|
115 |
+
max_new_tokens=max_new_tokens,
|
116 |
+
)
|
117 |
s = generation_output.sequences[0]
|
118 |
output = tokenizer.decode(s)
|
119 |
response = output.split("Response:")[1].strip()
|