Spaces:

slush0
/

petals-playground

Runtime error

App Files Files Community

slush0 commited on Feb 20, 2023

Commit

57a7522

1 Parent(s): dac87a3

Added automatic resuming of sessions in Chat mode (resends the context to the API).

Browse files

Files changed (3) hide show

app.py +1 -1
chat.py +71 -35
chat_client.py +13 -7

app.py CHANGED Viewed

@@ -18,4 +18,4 @@ with gr.Blocks() as iface:
 # Queues are required to enable generators
 iface.queue(concurrency_count=5, max_size=50)
-iface.launch()

 # Queues are required to enable generators
 iface.queue(concurrency_count=5, max_size=50)
+iface.launch(show_error=True)

chat.py CHANGED Viewed

@@ -10,37 +10,54 @@ CHAT_URL='ws://chat.petals.ml/api/v2/generate'
 EMPTY_STATE = {
     'generate': False,
     'history': [],
 }
-def generate(state, *args):
     # Save that we're in generating loop
     state['generate'] = True
     try:
-        for x in _generate(state, *args):
             yield x
     finally:
         state['generate'] = False
-def _generate(state, prompt, model, endseq, max_length,
-        do_sample, top_k, top_p, temperature,
-        context):
     eos = "</s>\n" if "bloomz" in model else "\n\n"
-    try:
-        client = chat_client.ModelClient(CHAT_URL)
-        client.open_session(f"bigscience/{model}-petals", max_length)
-        state['client'] = client
-    except Exception:
-        print(traceback.format_exc())
-        yield state, state['history'], prompt, "Error: " + traceback.format_exc()
-        return
     context += eos
-    for question, answer in state['history']:
-        context += f"Human: {question}{eos}AI: {answer}{eos}"
     # Fix eventual eos token mismatch and add eos token to context and prompt
     if "bloomz" in model:
@@ -50,7 +67,7 @@ def _generate(state, prompt, model, endseq, max_length,
         context = context.replace("</s>", eos)
         prompt2 = prompt.replace("</s>", eos) + "\n\n"
-    prompt2 = f"{context}Human: {prompt2}AI: "
     # Translate checkbox items to actual sequences
     seq = []
@@ -79,12 +96,13 @@ def _generate(state, prompt, model, endseq, max_length,
     if temperature == 0:
         temperature = 1.0
     # Update widgets even before we get the first response
-    yield state, state['history'] + [[prompt, '']], None, prompt2
-    output = ''
-    output_raw = ''
     orig_history = state['history']
     try:
         for out in client.generate(prompt2,
                     max_new_tokens=1,
@@ -97,36 +115,53 @@ def _generate(state, prompt, model, endseq, max_length,
             if not state['generate']:
                 client.close_session()
-                yield state, [], None, ''
                 return
-            output_raw += out
-            output += out
             # Detect end sequences and finish the generation
             # prematurely if found.
             for s in seq:
-                spl = output.split(s)
-                output = spl[0]
                 if len(spl) > 1:
-                    state['history'] = orig_history + [[prompt, output]]
-                    yield state, state['history'], None, prompt2 + output_raw
                     return
             # Keep original history untouched as we're adding just
             # a chunks at one moment.
-            state['history'] = orig_history + [[prompt, output]]
-            yield state, state['history'], None, prompt2 + output_raw
     except Exception:
         print(traceback.format_exc())
-        yield state, state['history'], prompt, output_raw + "\nError: " + traceback.format_exc()
         return
 def reset(state):
     """Resets the session and clears the chat window."""
     state.update(EMPTY_STATE)
-    return state, [], ''
 with gr.Blocks() as iface_chat:
     gr.Markdown("""**Let's talk to Bloom in a chat!**""")
@@ -163,6 +198,7 @@ with gr.Blocks() as iface_chat:
     chat = gr.Chatbot(label='Chat window')
     prompt = gr.Textbox(show_label=False, label='Prompt',
             placeholder="Prompt Here and press Enter...").style(container=False)
     with gr.Row():
         button_generate = gr.Button("Generate")
@@ -174,13 +210,13 @@ with gr.Blocks() as iface_chat:
     # Chat history
     state = gr.State(EMPTY_STATE)
-    inputs = [state, prompt, model, endseq, max_length, do_sample,
-            top_k, top_p, temperature, context]
-    outputs=[state, chat, prompt, output]
     prompt.submit(generate, inputs=inputs, outputs=outputs)
     button_generate.click(generate, inputs=inputs, outputs=outputs)
-    button_reset.click(reset, inputs=[state], outputs=[state, chat, output])
     examples = gr.Examples(inputs=[context, prompt, model, do_sample, top_k, top_p, temperature],
         examples=[

 EMPTY_STATE = {
     'generate': False,
+    'model': None,
+    'client': None,
     'history': [],
 }
+def generate(state, prompt, model, context, output, *args):
     # Save that we're in generating loop
     state['generate'] = True
     try:
+        for x in _generate(state, prompt, model, context, output, *args):
             yield x
+    except BrokenPipeError:
+        # Broken session, try to renew
+        # TODO This is a bit fragile because of recursive call...
+        print("Retrying session...")
+        context = output
+        output = ''
+        yield from generate(state, prompt, model, context, output, *args)
     finally:
         state['generate'] = False
+def _generate(state, prompt, model, context, output, endseq, max_length,
+        do_sample, top_k, top_p, temperature):
+    print('prompt', prompt)
     eos = "</s>\n" if "bloomz" in model else "\n\n"
+    if state['model'] != model or \
+        state['client'] == None or state['client'].is_session() == False:
+        try:
+            state['client'] = chat_client.ModelClient(CHAT_URL)
+            state['client'].open_session(f"bigscience/{model}-petals", max_length)
+            state['model'] = model
+        except Exception:
+            print(traceback.format_exc())
+            yield state, state['history'], prompt, output, \
+                gr.update(visible=True, value=traceback.format_exc())
+            return
+    else:
+        context = ''
+    client = state['client']
     context += eos
+    #for question, answer in state['history']:
+    #    context += f"Human: {question}{eos}AI: {answer}{eos}"
     # Fix eventual eos token mismatch and add eos token to context and prompt
     if "bloomz" in model:
         context = context.replace("</s>", eos)
         prompt2 = prompt.replace("</s>", eos) + "\n\n"
+    prompt2 = f"{context}Human: {prompt2}AI:"
     # Translate checkbox items to actual sequences
     seq = []
     if temperature == 0:
         temperature = 1.0
+    output += prompt2
     # Update widgets even before we get the first response
+    yield state, state['history'] + [[prompt, '']], None, output, gr.update(visible=False)
     orig_history = state['history']
+    new_line = ''
     try:
         for out in client.generate(prompt2,
                     max_new_tokens=1,
             if not state['generate']:
                 client.close_session()
+                yield state, [], None, '', ''
+                # Stopping generation
                 return
+            new_line += out
             # Detect end sequences and finish the generation
             # prematurely if found.
             for s in seq:
+                spl = new_line.split(s)
+                new_line = spl[0]
                 if len(spl) > 1:
+                    state['history'] = orig_history + [[prompt, new_line]]
+                    output += new_line
+                    yield state, state['history'], None, output, ''
+                    # Stopping generation
                     return
             # Keep original history untouched as we're adding just
             # a chunks at one moment.
+            state['history'] = orig_history + [[prompt, new_line]]
+            yield state, state['history'], None, output, ''
+    except BrokenPipeError:
+        # Session was interrupted
+        # Handled in upstream func
+        client.close_session()
+        state['client'] = None
+        state['model'] = None
+        print("Broken session!")
+        raise
     except Exception:
+        client.close_session()
+        state['client'] = None
+        state['model'] = None
         print(traceback.format_exc())
+        # TODO Store errors outside output log
+        yield state, state['history'], prompt, output, \
+            gr.update(visible=True, value=traceback.format_exc())
         return
 def reset(state):
     """Resets the session and clears the chat window."""
     state.update(EMPTY_STATE)
+    return state, [], '', gr.update(visible=False, value='')
 with gr.Blocks() as iface_chat:
     gr.Markdown("""**Let's talk to Bloom in a chat!**""")
     chat = gr.Chatbot(label='Chat window')
     prompt = gr.Textbox(show_label=False, label='Prompt',
             placeholder="Prompt Here and press Enter...").style(container=False)
+    error = gr.Textbox(label="Error log", visible=False, elem_id="error")
     with gr.Row():
         button_generate = gr.Button("Generate")
     # Chat history
     state = gr.State(EMPTY_STATE)
+    inputs = [state, prompt, model, context, output, endseq,
+        max_length, do_sample, top_k, top_p, temperature]
+    outputs=[state, chat, prompt, output, error]
     prompt.submit(generate, inputs=inputs, outputs=outputs)
     button_generate.click(generate, inputs=inputs, outputs=outputs)
+    button_reset.click(reset, inputs=[state], outputs=[state, chat, output, error])
     examples = gr.Examples(inputs=[context, prompt, model, do_sample, top_k, top_p, temperature],
         examples=[

chat_client.py CHANGED Viewed

@@ -22,29 +22,35 @@ class ModelClient(object):
         self.ws.send(json.dumps(payload))
         assert json.loads(self.ws.recv())['ok'] == True
     def close_session(self):
         if self.ws:
             self.ws.close()
     def generate(self, prompt, **kwargs):
         payload = {
                 "type": "generate",
                 "inputs": prompt,
                 "max_new_tokens": 1,
                 "do_sample": 0,
-                "temperature": 0,
                 "stop_sequence": "</s>" if "bloomz" in self.model else "\n\n",
             }
         payload = {**payload, **kwargs}
         self.ws.send(json.dumps(payload))
         while True:
-            try:
-                data = json.loads(self.ws.recv())
-            except json.decoder.JSONDecodeError:
-                self.close_session()
-                raise
             if not data['ok']:
                 raise Exception(data['traceback'])
             yield data['outputs']

         self.ws.send(json.dumps(payload))
         assert json.loads(self.ws.recv())['ok'] == True
+    def is_session(self):
+        return self.ws != None
     def close_session(self):
         if self.ws:
             self.ws.close()
+            self.ws = None
     def generate(self, prompt, **kwargs):
+        try:
+            return self._generate(prompt, **kwargs)
+        except:
+            self.close_session()
+            raise
+    def _generate(self, prompt, **kwargs):
         payload = {
                 "type": "generate",
                 "inputs": prompt,
                 "max_new_tokens": 1,
                 "do_sample": 0,
+                "temperature": 1,
                 "stop_sequence": "</s>" if "bloomz" in self.model else "\n\n",
             }
         payload = {**payload, **kwargs}
         self.ws.send(json.dumps(payload))
         while True:
+            data = json.loads(self.ws.recv())
             if not data['ok']:
                 raise Exception(data['traceback'])
             yield data['outputs']