Spaces:

slush0
/

petals-playground

Runtime error

App Files Files Community

slush0 commited on Feb 20, 2023

Commit

2caf2e7

1 Parent(s): 8d5fa1d

Calculates and prints generation speed.

Browse files

Files changed (2) hide show

chat.py +22 -4
prompt.py +23 -6

chat.py CHANGED Viewed

@@ -4,6 +4,7 @@
 import traceback
 import gradio as gr
 import chat_client
 import json
 import re
@@ -36,6 +37,19 @@ def generate(state, prompt, model, context, output, *args):
 def _generate(state, prompt, model, context, output, endseq, max_length,
         do_sample, top_k, top_p, temperature):
     print('prompt', prompt)
     eos = "</s>\n" if "bloomz" in model else "\n\n"
@@ -54,7 +68,7 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
             state['model'] = model
         except Exception:
             print(traceback.format_exc())
-            raise gr.Error(traceback.format_exc())
     else:
         context = ''
@@ -106,7 +120,7 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
     output += prompt2
     # Update widgets even before we get the first response
-    yield state, state['history'] + [[prompt, '']], None, output
     orig_history = state['history']
     new_line = ''
@@ -126,6 +140,7 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
                 # Stopping generation
                 return
             new_line += out
             # Detect end sequences and finish the generation
@@ -142,9 +157,12 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
             # Keep original history untouched as we're adding just
             # a chunks at one moment.
-            state['history'] = orig_history + [[prompt, new_line]]
             yield state, state['history'], None, output
     except (json.decoder.JSONDecodeError, BrokenPipeError):
         # Session was interrupted
         # Handled in upstream func
@@ -160,7 +178,7 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
         state['model'] = None
         print(traceback.format_exc())
-        raise gr.Error(traceback.format_exc())
 def reset(state):
     """Resets the session and clears the chat window."""

 import traceback
 import gradio as gr
 import chat_client
+import time
 import json
 import re
 def _generate(state, prompt, model, context, output, endseq, max_length,
         do_sample, top_k, top_p, temperature):
+    start = time.time()
+    cnt = 0 # Tokens generated
+    def stats():
+        # Produces inline stats for generation speed
+        if cnt == 0:
+            return "\u2026 | ? sec/t"
+        if cnt > time.time() - start:
+            items_per_sec = cnt / (time.time() - start)
+            return f" | {items_per_sec:.1f} t/sec"
+        sec_per_item = (time.time() - start) / cnt
+        return f" | {sec_per_item:.1f} sec/t"
     print('prompt', prompt)
     eos = "</s>\n" if "bloomz" in model else "\n\n"
             state['model'] = model
         except Exception:
             print(traceback.format_exc())
+            raise gr.Error(traceback.format_exc(limit=3))
     else:
         context = ''
     output += prompt2
     # Update widgets even before we get the first response
+    yield state, state['history'] + [[prompt, stats()]], None, output
     orig_history = state['history']
     new_line = ''
                 # Stopping generation
                 return
+            cnt += 1
             new_line += out
             # Detect end sequences and finish the generation
             # Keep original history untouched as we're adding just
             # a chunks at one moment.
+            state['history'] = orig_history + [[prompt, new_line + stats()]]
             yield state, state['history'], None, output
+        # Final line w/o statistics
+        yield state, state['history'], None, output
     except (json.decoder.JSONDecodeError, BrokenPipeError):
         # Session was interrupted
         # Handled in upstream func
         state['model'] = None
         print(traceback.format_exc())
+        raise gr.Error(traceback.format_exc(limit=3))
 def reset(state):
     """Resets the session and clears the chat window."""

prompt.py CHANGED Viewed

@@ -4,6 +4,7 @@
 import traceback
 import gradio as gr
 import chat_client
 CHAT_URL='ws://chat.petals.ml/api/v2/generate'
 #CHAT_URL='ws://localhost:8000/api/v2/generate'
@@ -22,13 +23,25 @@ def _generate(state, prompt, model, endseq, max_length,
         do_sample, top_k, top_p, temperature,
         add_stoptoken, copy_output):
     try:
         client = chat_client.ModelClient(CHAT_URL)
         client.open_session(f"bigscience/{model}-petals", max_length)
     except Exception:
         print(traceback.format_exc())
-        yield state, prompt, "Error: " + traceback.format_exc()
-        return
     if add_stoptoken:
         prompt += "</s>" if "bloomz" in model else "\n\n"
@@ -61,7 +74,7 @@ def _generate(state, prompt, model, endseq, max_length,
     # This render prompt dialog immediately and
     # don't wait to generator to return first result
-    yield [state, prompt2, output]
     try:
         for out in client.generate(prompt,
@@ -77,15 +90,19 @@ def _generate(state, prompt, model, endseq, max_length,
                 client.close_session()
                 return
             output += out
             if copy_output:
                 prompt2 += out
-            yield state, prompt2, output
     except Exception:
         print(traceback.format_exc())
-        yield state, prompt, output + "\nError: " + traceback.format_exc()
-        return
 def stop(state):
     """Stops generating."""

 import traceback
 import gradio as gr
 import chat_client
+import time
 CHAT_URL='ws://chat.petals.ml/api/v2/generate'
 #CHAT_URL='ws://localhost:8000/api/v2/generate'
         do_sample, top_k, top_p, temperature,
         add_stoptoken, copy_output):
+    start = time.time()
+    cnt = 0
+    def stats():
+        # Produces inline stats for generation speed
+        if cnt == 0:
+            return "\u2026 | ? sec/t"
+        if cnt > time.time() - start:
+            items_per_sec = cnt / (time.time() - start)
+            return f" | {items_per_sec:.1f} t/sec"
+        sec_per_item = (time.time() - start) / cnt
+        return f" | {sec_per_item:.1f} sec/t"
     try:
         client = chat_client.ModelClient(CHAT_URL)
         client.open_session(f"bigscience/{model}-petals", max_length)
     except Exception:
         print(traceback.format_exc())
+        raise gr.Error(traceback.format_exc(limit=3))
     if add_stoptoken:
         prompt += "</s>" if "bloomz" in model else "\n\n"
     # This render prompt dialog immediately and
     # don't wait to generator to return first result
+    yield [state, prompt2, stats()]
     try:
         for out in client.generate(prompt,
                 client.close_session()
                 return
+            cnt += 1
             output += out
             if copy_output:
                 prompt2 += out
+            yield state, prompt2, output + stats()
+        # Prints final result w/o statistics
+        yield state, prompt2, output
     except Exception:
         print(traceback.format_exc())
+        raise gr.Error(traceback.format_exc(limit=3))
 def stop(state):
     """Stops generating."""