import itertools import gradio as gr import requests import os def respond(message, history): if len(message.strip()) == 0: return "ERROR the question should not be empty" local_token = os.environ['API_TOKEN'] local_endpoint = os.environ['API_ENDPOINT'] # Add your API token to the headers headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {local_token}' } prompt = list(itertools.chain.from_iterable(history)) prompt.append(message) q = {"inputs": [prompt]} try: response = requests.post(local_endpoint, json=q, headers=headers, timeout=100) response_data = response.json( )["predictions"] except: response_data = "ERROR status_code:" + \ str(response.status_code) + " response:" + response.text #print(response.json()) return response_data demo = gr.ChatInterface( respond, chatbot=gr.Chatbot(height=400), textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7), title="Chat with a Databricks LLM serving endpoint", description="This is an advanced model hosted on Databricks Serving.", examples=[["Hello"], ["What is MLflow?"], ["What is Apache Spark?"]], cache_examples=False, theme="soft", retry_btn=None, undo_btn=None, clear_btn="Clear" ) if __name__ == "__main__": demo.launch()