Spaces:

slush0
/

petals-playground

Runtime error

App Files Files Community

slush0 commited on Feb 20, 2023

Commit

f2475e8

1 Parent(s): 2f09f5b

Adding session reset button to Chat mode.

Browse files

Files changed (2) hide show

app.py +1 -1
chat.py +49 -16

app.py CHANGED Viewed

@@ -17,5 +17,5 @@ with gr.Blocks() as iface:
     gr.TabbedInterface([iface_prompt, iface_chat], ["Prompt mode", "Chat mode"])
 # Queues are required to enable generators
-iface.queue(concurrency_count=5)
 iface.launch()

     gr.TabbedInterface([iface_prompt, iface_chat], ["Prompt mode", "Chat mode"])
 # Queues are required to enable generators
+iface.queue(concurrency_count=5, max_size=50)
 iface.launch()

chat.py CHANGED Viewed

@@ -8,22 +8,38 @@ import chat_client
 CHAT_URL='ws://chat.petals.ml/api/v2/generate'
 #CHAT_URL='ws://localhost:8000/api/v2/generate'
-def generate(prompt, model, endseq, max_length,
         do_sample, top_k, top_p, temperature,
-        context, state):
     eos = "</s>\n" if "bloomz" in model else "\n\n"
     try:
         client = chat_client.ModelClient(CHAT_URL)
         client.open_session(f"bigscience/{model}-petals", max_length)
     except Exception:
         print(traceback.format_exc())
-        yield state, state, prompt, "Error: " + traceback.format_exc()
         return
     context += eos
-    for question, answer in state:
         context += f"Human: {question}{eos}AI: {answer}{eos}"
     # Fix eventual eos token mismatch and add eos token to context and prompt
@@ -64,10 +80,11 @@ def generate(prompt, model, endseq, max_length,
         temperature = 1.0
     # Update widgets even before we get the first response
-    yield state + [[prompt, '']], state, None, prompt2
     output = ''
     output_raw = ''
     try:
         for out in client.generate(prompt2,
                     max_new_tokens=1,
@@ -78,6 +95,13 @@ def generate(prompt, model, endseq, max_length,
                     extra_stop_sequences=seq
             ):
             output_raw += out
             output += out
@@ -87,17 +111,25 @@ def generate(prompt, model, endseq, max_length,
                 spl = output.split(s)
                 output = spl[0]
                 if len(spl) > 1:
-                    state2 = state + [[prompt, output]]
-                    yield state2, state2, None, prompt2 + output_raw
                     return
-            state2 = state + [[prompt, output]]
-            yield state2, state2, None, prompt2 + output_raw
     except Exception:
         print(traceback.format_exc())
-        yield state, state, prompt, "Error: " + traceback.format_exc()
         return
 with gr.Blocks() as iface_chat:
     gr.Markdown("""**Let's talk to Bloom in a chat!**""")
@@ -116,7 +148,7 @@ with gr.Blocks() as iface_chat:
             # Switch between sampling and greedy generation
             do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")
             context = gr.Textbox(lines=3, label="Initial context:", interactive=True,
-                    value="A human talks to a powerful AI that follows the human's instructions. "
                           "AI is talkative, friendly, positive and provides detailed answers to any question.</s>\n"
                           "Human: Hi!</s>\n"
                           "AI: How can I help you?")
@@ -136,21 +168,22 @@ with gr.Blocks() as iface_chat:
     with gr.Row():
         button_generate = gr.Button("Generate")
-        # button_clear = gr.Button("Clear session") # TODO
         # button_stop = gr.Button("Stop") # TODO, not supported by websocket API yet.
     with gr.Accordion("Raw prompt log", open=False):
         output = gr.Textbox(lines=3, show_label=False).style(container=False)
     # Chat history
-    state = gr.State([])
-    inputs = [prompt, model, endseq, max_length, do_sample,
-            top_k, top_p, temperature, context, state]
-    outputs=[chat, state, prompt, output]
     prompt.submit(generate, inputs=inputs, outputs=outputs)
     button_generate.click(generate, inputs=inputs, outputs=outputs)
     examples = gr.Examples(inputs=[context, prompt, model, do_sample, top_k, top_p, temperature],
         examples=[

 CHAT_URL='ws://chat.petals.ml/api/v2/generate'
 #CHAT_URL='ws://localhost:8000/api/v2/generate'
+EMPTY_STATE = {
+    'generate': False,
+    'history': [],
+}
+def generate(state, *args):
+    # Save that we're in generating loop
+    state['generate'] = True
+    try:
+        for x in _generate(state, *args):
+            yield x
+    finally:
+        state['generate'] = False
+def _generate(state, prompt, model, endseq, max_length,
         do_sample, top_k, top_p, temperature,
+        context):
     eos = "</s>\n" if "bloomz" in model else "\n\n"
     try:
         client = chat_client.ModelClient(CHAT_URL)
         client.open_session(f"bigscience/{model}-petals", max_length)
+        state['client'] = client
     except Exception:
         print(traceback.format_exc())
+        yield state, state['history'], prompt, "Error: " + traceback.format_exc()
         return
     context += eos
+    for question, answer in state['history']:
         context += f"Human: {question}{eos}AI: {answer}{eos}"
     # Fix eventual eos token mismatch and add eos token to context and prompt
         temperature = 1.0
     # Update widgets even before we get the first response
+    yield state, state['history'] + [[prompt, '']], None, prompt2
     output = ''
     output_raw = ''
+    orig_history = state['history']
     try:
         for out in client.generate(prompt2,
                     max_new_tokens=1,
                     extra_stop_sequences=seq
             ):
+            if not state['generate']:
+                print("Stopping generation.")
+                client.close_session()
+                yield state, [], None, ''
+                return
+                #return state, state['history'], None, prompt2 + output_raw
             output_raw += out
             output += out
                 spl = output.split(s)
                 output = spl[0]
                 if len(spl) > 1:
+                    state['history'] = orig_history + [[prompt, output]]
+                    yield state, state['history'], None, prompt2 + output_raw
                     return
+            # Keep original history untouched as we're adding just
+            # a chunks at one moment.
+            state['history'] = orig_history + [[prompt, output]]
+            yield state, state['history'], None, prompt2 + output_raw
     except Exception:
         print(traceback.format_exc())
+        yield state, state['history'], prompt, output_raw + "\nError: " + traceback.format_exc()
         return
+def reset(state):
+    """Resets the session and clears the chat window."""
+    state.update(EMPTY_STATE)
+    return state, [], ''
 with gr.Blocks() as iface_chat:
     gr.Markdown("""**Let's talk to Bloom in a chat!**""")
             # Switch between sampling and greedy generation
             do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")
             context = gr.Textbox(lines=3, label="Initial context:", interactive=True,
+                    value="A human talks to a powerful AI that follows the human's instructions.\n"
                           "AI is talkative, friendly, positive and provides detailed answers to any question.</s>\n"
                           "Human: Hi!</s>\n"
                           "AI: How can I help you?")
     with gr.Row():
         button_generate = gr.Button("Generate")
+        button_reset = gr.Button("Reset/Clear session")
         # button_stop = gr.Button("Stop") # TODO, not supported by websocket API yet.
     with gr.Accordion("Raw prompt log", open=False):
         output = gr.Textbox(lines=3, show_label=False).style(container=False)
     # Chat history
+    state = gr.State(EMPTY_STATE)
+    inputs = [state, prompt, model, endseq, max_length, do_sample,
+            top_k, top_p, temperature, context]
+    outputs=[state, chat, prompt, output]
     prompt.submit(generate, inputs=inputs, outputs=outputs)
     button_generate.click(generate, inputs=inputs, outputs=outputs)
+    button_reset.click(reset, inputs=[state], outputs=[state, chat, output])
     examples = gr.Examples(inputs=[context, prompt, model, do_sample, top_k, top_p, temperature],
         examples=[