File size: 5,057 Bytes
3d3362f
 
 
820d5f8
3d3362f
 
 
 
 
 
 
 
 
 
820d5f8
 
 
 
 
 
 
3d3362f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820d5f8
 
3d3362f
 
 
 
 
 
820d5f8
3d3362f
820d5f8
 
 
3d3362f
820d5f8
 
 
 
 
3d3362f
 
 
79fb1e9
 
 
3d3362f
 
 
 
 
 
 
 
 
 
 
 
 
 
512c082
3d3362f
 
 
 
 
 
 
512c082
3d3362f
 
 
 
 
 
 
 
 
 
 
 
62a50f9
3d3362f
 
 
 
 
 
 
 
 
 
 
 
 
62a50f9
3d3362f
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python
# or gradio app.py

import traceback
import gradio as gr
import chat_client

CHAT_URL='ws://chat.petals.ml/api/v2/generate'
#CHAT_URL='ws://localhost:8000/api/v2/generate'

def generate(prompt, model, endseq, max_length,
        do_sample, top_k, top_p, temperature,
        add_stoptoken, copy_output):

    try:
        client = chat_client.ModelClient(CHAT_URL)
        client.open_session(f"bigscience/{model}-petals", max_length)
    except Exception:
        print(traceback.format_exc())
        yield [prompt, "Error: " + traceback.format_exc()]
        return

    if add_stoptoken:
        prompt += "</s>" if "bloomz" in model else "\n\n"

    # Translate checkbox items to actual sequences
    seq = []
    for s in endseq:
        if s == "\\n":
            seq.append("\n")
        elif s == "</s>":
            seq.append("</s>")
        elif s == "? (question mark)":
            seq.append("?")
        elif s == ". (dot)":
            seq.append(".")

    # only top_k or top_p can be set
    if top_k == 0:
        top_k = None
    if top_p == 0:
        top_p = None
    if top_p and top_k:
        top_k = None

    prompt2 = prompt
    output = ''

    # This render prompt dialog immediately and
    # don't wait to generator to return first result
    yield [prompt2, output]

    try:
        for out in client.generate(prompt,
                    max_new_tokens=1,
                    do_sample=do_sample,
                    temperature=temperature,
                    top_k=top_k,
                    top_p=top_p,
                    extra_stop_sequences=seq
            ):

            output += out
            if copy_output:
                prompt2 += out

            yield [prompt2, output]
    except Exception:
        print(traceback.format_exc())
        yield [prompt, "Error: " + traceback.format_exc()]
        return

with gr.Blocks() as iface:
    gr.Markdown("""# Petals playground
            **Let's play with prompts and inference settings for BLOOM and BLOOMZ 176B models!**

            This space uses websocket API of [chat.petals.ml](http://chat.petals.ml). Health status of Petals network [lives here](http://health.petals.ml).

            Do NOT talk to BLOOM as an entity, it's not a chatbot but a webpage/blog/article completion model.
            For the best results: MIMIC a few sentences of a webpage similar to the content you want to generate.

            BLOOMZ performs better in chat mode and understands the instructions better.""")

    with gr.Row():
        model = gr.Radio(["bloom", "bloomz", "bloom-7b1"], value='bloom', label="Use model")

        # Additional ending sequence, at which generation shoud stop
        endseq = gr.CheckboxGroup(["\\n", "</s>", "? (question mark)", ". (dot)"],
            value=["\\n", "</s>"], label='Extra end sequences')

        # Maximum length of inference session
        max_length = gr.Radio([64, 128, 256, 512, 1024, 2048], value=256, interactive=True, label="Max length")

    with gr.Row():
        with gr.Column():
            # Switch between sampling and greedy generation
            do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")

            # Should the app append stop sequence at the end of prompt or should it leave the prompt open?
            add_stoptoken = gr.Checkbox(value=True, interactive=True, label="Automatically add eos token to the prompt.")

        # Only one of top_k and top_p can be set. Requires "do_sample=True" to work.
        top_k = gr.Number(value=0, precision=0, interactive=True, label="top_k")
        top_p = gr.Number(value=0.9, precision=2, interactive=True, label="top_p")

        # Generation temperature
        temperature = gr.Number(value=0.75, precision=2, interactive=True, label="Temperature")

    prompt = gr.Textbox(lines=2, label='Prompt', placeholder="Prompt Here...")

    with gr.Row():
        button_generate = gr.Button("Generate")
        # button_stop = gr.Button("Stop") # TODO, not supported by websocket API yet.

        # Automatically copy the output at the end of prompt
        copy_output = gr.Checkbox(label="Output -> Prompt")

    output = gr.Textbox(lines=3, label='Output')

    button_generate.click(generate, inputs=[prompt, model, endseq,
            max_length, do_sample, top_k, top_p, temperature, add_stoptoken, copy_output], outputs=[prompt, output])

    examples = gr.Examples(inputs=[prompt, model, do_sample, top_k, top_p, temperature, add_stoptoken],
        examples=[
        ["The SQL command to extract all the users whose name starts with A is: ", "bloom", False, 0, 0, 1, False],
        ["The Spanish translation of thank you for your help is: ", "bloom", False, 0, 0, 1, False],
        ["A human talks to a powerful AI that follows the human's instructions.</s>\n"
         "Human: Hi!</s>\n"
         "AI: Hi! How can I help you?</s>\n"
         "Human: What's the capital of Portugal?</s>\n"
         "AI: ", "bloomz", True, 0, 0.9, 0.75, False]
        ])

iface.queue()
iface.launch()