Spaces:

mamkkl
/

demo1

Paused

mamkkl commited on Jan 8

Commit

c693fed

verified ·

1 Parent(s): caac06d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -84,7 +84,7 @@ def generator(input_ids, generation_config, max_new_tokens):
             )
     return generation_output
-@spaces.GPU(duration=600)
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -105,7 +105,15 @@ def respond(
             num_beams=1,
             max_new_tokens = max_new_tokens
         )
-    generation_output = generator(input_ids, generation_config, max_new_tokens)
     s = generation_output.sequences[0]
     output = tokenizer.decode(s)
     response = output.split("Response:")[1].strip()

             )
     return generation_output
+@spaces.GPU(duration=120)
 def respond(
     message,
     history: list[tuple[str, str]],
             num_beams=1,
             max_new_tokens = max_new_tokens
         )
+    #generation_output = generator(input_ids, generation_config, max_new_tokens)
+    with torch.no_grad():
+        generation_output = model.generate(
+                input_ids=input_ids,
+                generation_config=generation_config,
+                return_dict_in_generate=True,
+                output_scores=False,
+                max_new_tokens=max_new_tokens,
+            )
     s = generation_output.sequences[0]
     output = tokenizer.decode(s)
     response = output.split("Response:")[1].strip()