import itertools
import gradio as gr
import requests
import os


def respond(message, history):

    if len(message.strip()) == 0:
        return "ERROR the question should not be empty"


    local_token = os.environ['API_TOKEN']
    local_endpoint = os.environ['API_ENDPOINT']


    # Add your API token to the headers
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {local_token}'
    }

    prompt = list(itertools.chain.from_iterable(history))
    prompt.append(message)
    q = {"inputs": [prompt]}
    try:
        response = requests.post(local_endpoint, json=q, headers=headers, timeout=100)
        response_data = response.json(
        )["predictions"]
    except:
        response_data = "ERROR status_code:" + \
            str(response.status_code) + " response:" + response.text

    #print(response.json())
    return response_data


demo = gr.ChatInterface(
    respond,
    chatbot=gr.Chatbot(height=400),
    textbox=gr.Textbox(placeholder="Ask me a question",
                       container=False, scale=7),
    title="Databricks LLM RAG demo - Chat with llama2 Databricks model serving endpoint",
    description="This chatbot is a demo example for the dbdemos llm chatbot. <br>This content is provided as a LLM RAG educational example, without support. It is using llama2, can hallucinate and should not be used as production content.<br>Please review our dbdemos license and terms for more details.",
    examples=[["How can I start a Databricks cluster?"], ["What is a Databricks Cluster Policy?"]],
    cache_examples=False,
    theme="soft",
    retry_btn=None,
    undo_btn=None,
    clear_btn="Clear"
)

if __name__ == "__main__":
    demo.launch()