excel-help

Runtime error

File size: 6,560 Bytes

129413a
 
 
42b930d
6181e1d
129413a
e7d12f5
8f4c543
e7d12f5
6181e1d
 
129413a
10065fb
 
187265b
10065fb
187265b
10065fb
187265b
72f2f88
05eceeb
10065fb
 
05eceeb
1beff8d
187265b
87e586e
 
 
 
 
6bfce88
028e7df
 
fb7a592
87e586e
d5139bf
87e586e
d5139bf
fb7a592
 
 
 
1beff8d
129413a
87e586e
129413a
b8480bb
129413a
028e7df
 
 
 
 
 
 
 
 
 
 
 
8f4c543
6bfce88
fb7a592
6bfce88
fb7a592
87e586e
d5139bf
87e586e
d5139bf
fb7a592
 
 
 
 
8f4c543
87e586e
8f4c543
 
efe4a11
8f4c543
 
 
fb7a592
 
ea86efe
 
 
fb7a592
 
8f4c543
 
6181e1d
8f4c543
 
 
 
efe4a11
 
 
 
8f4c543
efe4a11
8f4c543
 
 
 
 
 
028e7df
ea86efe
 
 
 
 
 
fb7a592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea86efe
 
 
 
 
503973a
ec73d16
ea86efe
 
efe4a11
ea86efe
dc019e5
ea86efe
 
503973a
ec73d16
ea86efe
 
efe4a11
ea86efe
fb7a592
8f4c543
 
 
 
6bfce88
ea86efe
 
8f4c543
10065fb
 
 
 
ea86efe
8c7d524

import json 
import gradio as gr
import os
import requests
from huggingface_hub import AsyncInferenceClient

HF_TOKEN = os.getenv('HF_TOKEN')
api_url = os.getenv('API_URL')
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
client = AsyncInferenceClient(api_url)


system_message = "\nYou are a helpful, respectful and honest Excel formula assistant. Always answer as helpfully as possible, while being safe."
title = "Excel Bot"
description = """
This is an Excel Assistant AI.

Note: Derivate work of [Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta. 
"""
css = """.toast-wrap { display: none !important } """
examples=[
    ['Write an Excel formula to sum number in a row.'],
    ["Write an Excel formula to generate a random number."],
    ]


# Note: We have removed default system prompt as requested by the paper authors [Dated: 13/Oct/2023]
# Prompting style for Llama2 without using system prompt
# <s>[INST] {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]


# Stream text - stream tokens with InferenceClient from TGI
async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
    
    if system_prompt != "":
        input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
    else:
        input_prompt = f"<s>[INST] "
        
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)
    
    for interaction in chatbot:
        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s>[INST] "

    input_prompt = input_prompt + str(message) + " [/INST] "

    partial_message = ""
    async for token in await client.text_generation(prompt=input_prompt, 
                                    max_new_tokens=max_new_tokens, 
                                    stream=True, 
                                    best_of=1, 
                                    temperature=temperature, 
                                    top_p=top_p, 
                                    do_sample=True, 
                                    repetition_penalty=repetition_penalty):
        partial_message = partial_message + token 
        yield partial_message
        

# No Stream - batch produce tokens using TGI inference endpoint
def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
    
    if system_prompt != "":
        input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
    else:
        input_prompt = f"<s>[INST] "
        
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)
    
    for interaction in chatbot:
        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s>[INST] "

    input_prompt = input_prompt + str(message) + " [/INST] "
    print(f"input_prompt - {input_prompt}")

    data = {
        "inputs": input_prompt,
        "parameters": {
            "max_new_tokens":max_new_tokens,
            "temperature":temperature,
            "top_p":top_p,
            "repetition_penalty":repetition_penalty, 
            "do_sample":True,
        },
    }

    response = requests.post(api_url, headers=headers,  json=data ) #auth=('hf', hf_token)) data=json.dumps(data),
    
    if response.status_code == 200:  # check if the request was successful
        try:
            json_obj = response.json()
            if 'generated_text' in json_obj[0] and len(json_obj[0]['generated_text']) > 0:
                return json_obj[0]['generated_text']
            elif 'error' in json_obj[0]:
                return json_obj[0]['error'] + ' Please refresh and try again with smaller input prompt'
            else:
                print(f"Unexpected response: {json_obj[0]}")
        except json.JSONDecodeError:
            print(f"Failed to decode response as JSON: {response.text}")
    else:
        print(f"Request failed with status code {response.status_code}")



def vote(data: gr.LikeData):
    if data.liked:
        print("You upvoted this response: " + data.value)
    else:
        print("You downvoted this response: " + data.value)
        

additional_inputs=[
    gr.Textbox("", label="Optional system prompt"),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=256,
        minimum=0,
        maximum=4096,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.6,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    )
]

chatbot_stream = gr.Chatbot(avatar_images=('user.png', 'bot2.png'),bubble_full_width = False)
chatbot_batch = gr.Chatbot(avatar_images=('user1.png', 'bot1.png'),bubble_full_width = False)
chat_interface_stream = gr.ChatInterface(predict, 
                 title=title, 
                 description=description, 
                 textbox=gr.Textbox(),
                 chatbot=chatbot_stream,
                 css=css, 
                 examples=examples, 
                 #cache_examples=True, 
                 additional_inputs=additional_inputs,) 
chat_interface_batch=gr.ChatInterface(predict_batch, 
                 title=title, 
                 description=description, 
                 textbox=gr.Textbox(),
                 chatbot=chatbot_batch,
                 css=css, 
                 examples=examples, 
                 #cache_examples=True, 
                 additional_inputs=additional_inputs,) 

# Gradio Demo 
with gr.Blocks() as demo:

    with gr.Tab("Streaming"):
        # streaming chatbot
        chatbot_stream.like(vote, None, None)
        chat_interface_stream.render()

    # with gr.Tab("Batch"):
    #     # non-streaming chatbot
    #     chatbot_batch.like(vote, None, None)
    #     chat_interface_batch.render()
        
demo.queue(max_size=100).launch()