Spaces:
Runtime error
Runtime error
File size: 6,560 Bytes
129413a 42b930d 6181e1d 129413a e7d12f5 8f4c543 e7d12f5 6181e1d 129413a 10065fb 187265b 10065fb 187265b 10065fb 187265b 72f2f88 05eceeb 10065fb 05eceeb 1beff8d 187265b 87e586e 6bfce88 028e7df fb7a592 87e586e d5139bf 87e586e d5139bf fb7a592 1beff8d 129413a 87e586e 129413a b8480bb 129413a 028e7df 8f4c543 6bfce88 fb7a592 6bfce88 fb7a592 87e586e d5139bf 87e586e d5139bf fb7a592 8f4c543 87e586e 8f4c543 efe4a11 8f4c543 fb7a592 ea86efe fb7a592 8f4c543 6181e1d 8f4c543 efe4a11 8f4c543 efe4a11 8f4c543 028e7df ea86efe fb7a592 ea86efe 503973a ec73d16 ea86efe efe4a11 ea86efe dc019e5 ea86efe 503973a ec73d16 ea86efe efe4a11 ea86efe fb7a592 8f4c543 6bfce88 ea86efe 8f4c543 10065fb ea86efe 8c7d524 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import json
import gradio as gr
import os
import requests
from huggingface_hub import AsyncInferenceClient
HF_TOKEN = os.getenv('HF_TOKEN')
api_url = os.getenv('API_URL')
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
client = AsyncInferenceClient(api_url)
system_message = "\nYou are a helpful, respectful and honest Excel formula assistant. Always answer as helpfully as possible, while being safe."
title = "Excel Bot"
description = """
This is an Excel Assistant AI.
Note: Derivate work of [Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta.
"""
css = """.toast-wrap { display: none !important } """
examples=[
['Write an Excel formula to sum number in a row.'],
["Write an Excel formula to generate a random number."],
]
# Note: We have removed default system prompt as requested by the paper authors [Dated: 13/Oct/2023]
# Prompting style for Llama2 without using system prompt
# <s>[INST] {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]
# Stream text - stream tokens with InferenceClient from TGI
async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
if system_prompt != "":
input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
else:
input_prompt = f"<s>[INST] "
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
for interaction in chatbot:
input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s>[INST] "
input_prompt = input_prompt + str(message) + " [/INST] "
partial_message = ""
async for token in await client.text_generation(prompt=input_prompt,
max_new_tokens=max_new_tokens,
stream=True,
best_of=1,
temperature=temperature,
top_p=top_p,
do_sample=True,
repetition_penalty=repetition_penalty):
partial_message = partial_message + token
yield partial_message
# No Stream - batch produce tokens using TGI inference endpoint
def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
if system_prompt != "":
input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
else:
input_prompt = f"<s>[INST] "
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
for interaction in chatbot:
input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s>[INST] "
input_prompt = input_prompt + str(message) + " [/INST] "
print(f"input_prompt - {input_prompt}")
data = {
"inputs": input_prompt,
"parameters": {
"max_new_tokens":max_new_tokens,
"temperature":temperature,
"top_p":top_p,
"repetition_penalty":repetition_penalty,
"do_sample":True,
},
}
response = requests.post(api_url, headers=headers, json=data ) #auth=('hf', hf_token)) data=json.dumps(data),
if response.status_code == 200: # check if the request was successful
try:
json_obj = response.json()
if 'generated_text' in json_obj[0] and len(json_obj[0]['generated_text']) > 0:
return json_obj[0]['generated_text']
elif 'error' in json_obj[0]:
return json_obj[0]['error'] + ' Please refresh and try again with smaller input prompt'
else:
print(f"Unexpected response: {json_obj[0]}")
except json.JSONDecodeError:
print(f"Failed to decode response as JSON: {response.text}")
else:
print(f"Request failed with status code {response.status_code}")
def vote(data: gr.LikeData):
if data.liked:
print("You upvoted this response: " + data.value)
else:
print("You downvoted this response: " + data.value)
additional_inputs=[
gr.Textbox("", label="Optional system prompt"),
gr.Slider(
label="Temperature",
value=0.9,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=256,
minimum=0,
maximum=4096,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.6,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
chatbot_stream = gr.Chatbot(avatar_images=('user.png', 'bot2.png'),bubble_full_width = False)
chatbot_batch = gr.Chatbot(avatar_images=('user1.png', 'bot1.png'),bubble_full_width = False)
chat_interface_stream = gr.ChatInterface(predict,
title=title,
description=description,
textbox=gr.Textbox(),
chatbot=chatbot_stream,
css=css,
examples=examples,
#cache_examples=True,
additional_inputs=additional_inputs,)
chat_interface_batch=gr.ChatInterface(predict_batch,
title=title,
description=description,
textbox=gr.Textbox(),
chatbot=chatbot_batch,
css=css,
examples=examples,
#cache_examples=True,
additional_inputs=additional_inputs,)
# Gradio Demo
with gr.Blocks() as demo:
with gr.Tab("Streaming"):
# streaming chatbot
chatbot_stream.like(vote, None, None)
chat_interface_stream.render()
# with gr.Tab("Batch"):
# # non-streaming chatbot
# chatbot_batch.like(vote, None, None)
# chat_interface_batch.render()
demo.queue(max_size=100).launch() |