Spaces:

Artples
/

Starling-LM-7B-beta

Running on Zero

GGLS commited on Mar 13, 2024

Commit

318864b

verified ·

1 Parent(s): e9816b5

Upload app.py (#2)

- Upload app.py (2011daf1337c19c09423a3b163cae1ba24db559c)

Co-authored-by: Longguang Zhong <[email protected]>

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,7 +48,7 @@ def generate(
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
@@ -130,4 +130,4 @@ with gr.Blocks(css="style.css") as demo:
     chat_interface.render()
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt", add_generation_prompt=True)
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     chat_interface.render()
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()