File size: 3,812 Bytes
b7ffcd3
 
 
038efdf
b7ffcd3
 
 
 
4837ac0
 
ae63702
f6d2ab7
b7ffcd3
038efdf
13846fb
 
038efdf
b7ffcd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
038efdf
 
 
 
 
 
 
 
42d9baa
 
038efdf
ad36a2f
038efdf
42d9baa
038efdf
b7ffcd3
 
 
038efdf
edb75ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7ffcd3
038efdf
7b2c79d
edb75ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b2c79d
038efdf
 
ad36a2f
038efdf
 
ad36a2f
 
038efdf
 
 
ad36a2f
b7ffcd3
 
484af9d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

#Update: Using a new base model
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
dataset = load_dataset("JustKiddo/KiddosVault")

# Load the tokenizer and model for token display
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small") #Google's T5 Model
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

#My custom token generator
def generate_tokens(text):
    input = tokenizer(text, return_tensors="pt")
    output = model.generate(**input)

    input_ids = input["input_ids"].tolist()[0]
    output_ids = output.tolist()[0]

    formatted_output = [format(x, 'd') for x in output_ids]

    input_tokens_str = tokenizer.convert_ids_to_tokens(input_ids)
    #output_tokens_str = tokenizer.convert_tokens_to_ids(output_ids)

    return " ".join(input_tokens_str), " ".join(formatted_output)

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""

#chatInterface = gr.ChatInterface(
#    respond,
#    additional_inputs=[
#        gr.Textbox(value="You are a professional Mental Healthcare Chatbot.", label="System message"),
#        gr.Slider(minimum=1, maximum=6144, value=6144, step=1, label="Max new tokens"),
#        gr.Slider(minimum=0.1, maximum=4.0, value=1, step=0.1, label="Temperature"),
#        gr.Slider(
#            minimum=0.1,
#            maximum=1.0,
#            value=0.95,
#            step=0.05,
#            label="Top-p (nucleus sampling)",
#        ),
#    ],
#)

with gr.Blocks() as demo:
    with gr.Column():
        gr.ChatInterface(
            respond,
            additional_inputs=[
                gr.Textbox(value="You are a professional Mental Healthcare Chatbot.", label="System message"),
                gr.Slider(minimum=1, maximum=6144, value=6144, step=1, label="Max new tokens"),
                gr.Slider(minimum=0.1, maximum=4.0, value=1, step=0.1, label="Temperature"),
                gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p (nucleus sampling)",
                ),
            ],
        )
        
    with gr.Row():
        input_text = gr.Textbox(label="Input text")
        input_tokens = gr.Textbox(label="Input tokens")
        output_ids = gr.Textbox(label="Output tokens")

        def update_tokens(input_text):
            input_tokens_str, output_ids = generate_tokens(input_text)
            return input_tokens_str, output_ids

        input_text.change(update_tokens, 
                          inputs=input_text,
                          outputs=[input_tokens, output_ids])

if __name__ == "__main__":
    demo.launch(debug=True)