Spaces:
Sleeping
Sleeping
File size: 3,812 Bytes
b7ffcd3 038efdf b7ffcd3 4837ac0 ae63702 f6d2ab7 b7ffcd3 038efdf 13846fb 038efdf b7ffcd3 038efdf 42d9baa 038efdf ad36a2f 038efdf 42d9baa 038efdf b7ffcd3 038efdf edb75ca b7ffcd3 038efdf 7b2c79d edb75ca 7b2c79d 038efdf ad36a2f 038efdf ad36a2f 038efdf ad36a2f b7ffcd3 484af9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
#Update: Using a new base model
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
dataset = load_dataset("JustKiddo/KiddosVault")
# Load the tokenizer and model for token display
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small") #Google's T5 Model
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
#My custom token generator
def generate_tokens(text):
input = tokenizer(text, return_tensors="pt")
output = model.generate(**input)
input_ids = input["input_ids"].tolist()[0]
output_ids = output.tolist()[0]
formatted_output = [format(x, 'd') for x in output_ids]
input_tokens_str = tokenizer.convert_ids_to_tokens(input_ids)
#output_tokens_str = tokenizer.convert_tokens_to_ids(output_ids)
return " ".join(input_tokens_str), " ".join(formatted_output)
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
#chatInterface = gr.ChatInterface(
# respond,
# additional_inputs=[
# gr.Textbox(value="You are a professional Mental Healthcare Chatbot.", label="System message"),
# gr.Slider(minimum=1, maximum=6144, value=6144, step=1, label="Max new tokens"),
# gr.Slider(minimum=0.1, maximum=4.0, value=1, step=0.1, label="Temperature"),
# gr.Slider(
# minimum=0.1,
# maximum=1.0,
# value=0.95,
# step=0.05,
# label="Top-p (nucleus sampling)",
# ),
# ],
#)
with gr.Blocks() as demo:
with gr.Column():
gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a professional Mental Healthcare Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=6144, value=6144, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=1, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
with gr.Row():
input_text = gr.Textbox(label="Input text")
input_tokens = gr.Textbox(label="Input tokens")
output_ids = gr.Textbox(label="Output tokens")
def update_tokens(input_text):
input_tokens_str, output_ids = generate_tokens(input_text)
return input_tokens_str, output_ids
input_text.change(update_tokens,
inputs=input_text,
outputs=[input_tokens, output_ids])
if __name__ == "__main__":
demo.launch(debug=True) |