Spaces:

slush0
/

petals-playground

Runtime error

App Files Files Community

slush0 commited on Feb 20, 2023

Commit

104f494

1 Parent(s): d17e7da

Adding chat mode.

Browse files

Files changed (3) hide show

app.py +2 -1
chat.py +160 -0
prompt.py +7 -2

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from prompt import iface_prompt
 with gr.Blocks() as iface:
     gr.Markdown("""# Petals playground
@@ -13,7 +14,7 @@ with gr.Blocks() as iface:
             BLOOMZ performs better in chat mode and understands the instructions better.""")
-    gr.TabbedInterface([iface_prompt, ], ["Prompt mode",])
 # Queues are required to enable generators
 iface.queue(concurrency_count=5)

 import gradio as gr
 from prompt import iface_prompt
+from chat import iface_chat
 with gr.Blocks() as iface:
     gr.Markdown("""# Petals playground
             BLOOMZ performs better in chat mode and understands the instructions better.""")
+    gr.TabbedInterface([iface_prompt, iface_chat], ["Prompt mode", "Chat mode"])
 # Queues are required to enable generators
 iface.queue(concurrency_count=5)

chat.py ADDED Viewed

	@@ -0,0 +1,160 @@

+#!/usr/bin/env python
+# or gradio app.py
+import traceback
+import gradio as gr
+import chat_client
+CHAT_URL='ws://chat.petals.ml/api/v2/generate'
+#CHAT_URL='ws://localhost:8000/api/v2/generate'
+def generate(prompt, model, endseq, max_length,
+        do_sample, top_k, top_p, temperature,
+        context, state):
+    eos = "</s>\n" if "bloomz" in model else "\n\n"
+    try:
+        client = chat_client.ModelClient(CHAT_URL)
+        client.open_session(f"bigscience/{model}-petals", max_length)
+    except Exception:
+        print(traceback.format_exc())
+        yield state, state, prompt, "Error: " + traceback.format_exc()
+        return
+    context += eos
+    for question, answer in state:
+        context += f"Human: {question}{eos}AI: {answer}{eos}"
+    # Fix eventual eos token mismatch and add eos token to context and prompt
+    if "bloomz" in model:
+        context = context.replace("\n\n", eos)
+        prompt2 = prompt.replace("\n\n", eos) + "</s>\n"
+    else:
+        context = context.replace("</s>", eos)
+        prompt2 = prompt.replace("</s>", eos) + "\n\n"
+    prompt2 = f"{context}Human: {prompt2}AI: "
+    # Translate checkbox items to actual sequences
+    seq = []
+    for s in endseq:
+        if s == "Human:":
+            seq.append("Human:")
+        if s == "AI:":
+            seq.append("AI:")
+        if s == "\\n":
+            seq.append("\n")
+        elif s == "</s>":
+            seq.append("</s>")
+        elif s == "? (question mark)":
+            seq.append("?")
+        elif s == ". (dot)":
+            seq.append(".")
+    # only top_k or top_p can be set
+    if top_k == 0:
+        top_k = None
+    if top_p == 0:
+        top_p = None
+    if top_p and top_k:
+        top_k = None
+    if temperature == 0:
+        temperature = 1.0
+    # Update widgets even before we get the first response
+    yield state + [[prompt, '']], state, None, prompt2
+    output = ''
+    output_raw = ''
+    try:
+        for out in client.generate(prompt2,
+                    max_new_tokens=1,
+                    do_sample=do_sample,
+                    temperature=temperature,
+                    top_k=top_k,
+                    top_p=top_p,
+                    extra_stop_sequences=seq
+            ):
+            output_raw += out
+            output += out
+            # Detect end sequences and finish the generation
+            # prematurely if found.
+            for s in seq:
+                spl = output.split(s)
+                output = spl[0]
+                if len(spl) > 1:
+                    state2 = state + [[prompt, output]]
+                    yield state2, state2, None, prompt2 + output_raw
+                    return
+            state2 = state + [[prompt, output]]
+            yield state2, state2, None, prompt2 + output_raw
+    except Exception:
+        print(traceback.format_exc())
+        yield state, state, prompt, "Error: " + traceback.format_exc()
+        return
+with gr.Blocks() as iface_chat:
+    gr.Markdown("""**Let's talk to Bloom in a chat!**""")
+    with gr.Row():
+        model = gr.Radio(["bloom", "bloomz", "bloom-7b1"], value='bloomz', label="Use model")
+        # Additional ending sequence, at which generation shoud stop
+        endseq = gr.CheckboxGroup(["Human:", "AI:", "\\n", "</s>", "? (question mark)", ". (dot)"],
+                value=["Human:", "AI:", "\\n", "</s>"], label='Extra end sequences')
+        # Maximum length of inference session
+        max_length = gr.Radio([64, 128, 256, 512, 1024, 2048], value=1024, interactive=True, label="Max length")
+    with gr.Row():
+        with gr.Column():
+            # Switch between sampling and greedy generation
+            do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")
+            context = gr.Textbox(lines=3, label="Initial context:", interactive=True,
+                    value="A human talks to a powerful AI that follows the human's instructions.</s>\n"
+                          "Human: Hi!</s>\n"
+                          "AI: How can I help you?")
+        # Only one of top_k and top_p can be set. Requires "do_sample=True" to work.
+        top_k = gr.Number(value=0, precision=0, interactive=True, label="top_k")
+        top_p = gr.Number(value=0.9, precision=2, interactive=True, label="top_p")
+        # TODO num_beams
+        # Generation temperature
+        temperature = gr.Number(value=0.75, precision=2, interactive=True, label="Temperature")
+    chat = gr.Chatbot(label='Chat window')
+    prompt = gr.Textbox(show_label=False,
+            placeholder="Prompt Here and press Enter...").style(container=False)
+    with gr.Row():
+        button_generate = gr.Button("Generate")
+        # button_clear = gr.Button("Clear session") # TODO
+        # button_stop = gr.Button("Stop") # TODO, not supported by websocket API yet.
+    output = gr.Textbox(lines=3, label='Raw Prompt Log')
+    # Chat history
+    state = gr.State([])
+    inputs = [prompt, model, endseq, max_length, do_sample,
+            top_k, top_p, temperature, context, state]
+    outputs=[chat, state, prompt, output]
+    prompt.submit(generate, inputs=inputs, outputs=outputs)
+    button_generate.click(generate, inputs=inputs, outputs=outputs)
+    examples = gr.Examples(inputs=[context, prompt, model, do_sample, top_k, top_p, temperature],
+        examples=[
+        ["A human talks to a powerful AI that follows the human's instructions.</s>\n"
+         "Human: Hi!</s>\n"
+         "AI: Hi! How can I help you?",
+         "Could you remind me please what's the capital of Portugal?",
+         "bloomz", True, 0, 0.9, 0.75]
+        ])

prompt.py CHANGED Viewed

@@ -43,6 +43,9 @@ def generate(prompt, model, endseq, max_length,
     if top_p and top_k:
         top_k = None
     prompt2 = prompt
     output = ''
@@ -110,8 +113,10 @@ with gr.Blocks() as iface_prompt:
     output = gr.Textbox(lines=3, label='Output')
-    button_generate.click(generate, inputs=[prompt, model, endseq,
-            max_length, do_sample, top_k, top_p, temperature, add_stoptoken, copy_output], outputs=[prompt, output])
     examples = gr.Examples(inputs=[prompt, model, do_sample, top_k, top_p, temperature, add_stoptoken],
         examples=[

     if top_p and top_k:
         top_k = None
+    if not temperature:
+        temperature = 1.0
     prompt2 = prompt
     output = ''
     output = gr.Textbox(lines=3, label='Output')
+    inputs = [prompt, model, endseq, max_length, do_sample,
+            top_k, top_p, temperature, add_stoptoken, copy_output]
+    outputs = [prompt, output]
+    button_generate.click(generate, inputs=inputs, outputs=outputs)
     examples = gr.Examples(inputs=[prompt, model, do_sample, top_k, top_p, temperature, add_stoptoken],
         examples=[