import gradio as gr import requests import os def respond(message, history, endpoint, token, temp, max_token=0): if len(message.strip()) == 0: return "ERROR the question should not be empty" if len(token.strip()) > 0 and len(endpoint.strip()) > 0: local_token = token local_endpoint = endpoint custom_message = "[*] " else: local_token = os.environ['API_TOKEN'] local_endpoint = os.environ['API_ENDPOINT'] custom_message = "" # Add your API token to the headers headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {local_token}' } q = {"inputs": {"prompt": ["Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n" + message + "\n\n### Response:\n"], "max_tokens": [int(max_token)], "temperature": [int(temp)]}} try: response = requests.post(local_endpoint, json=q, headers=headers, timeout=100) response_data = response.json( )["predictions"][0]["candidates"][0]["text"] except: response_data = "ERROR status_code:" + \ str(response.status_code) + " response:" + response.text return custom_message + response_data demo = gr.ChatInterface( respond, chatbot=gr.Chatbot(height=400), textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7), title="Chat with a Databricks LLM serving endpoint", description="This a MPT-7b model", examples=[["Hello"], ["What is MLflow?"], ["What is Apache Spark?"]], cache_examples=False, theme="soft", retry_btn=None, undo_btn=None, clear_btn="Clear", additional_inputs=[ gr.Textbox(label="Custom Endpoint", type="text", placeholder="https://XXXXXX.cloud.databricks.com/serving-endpoints/XXXXX/invocations"), gr.Textbox(label="Custom Token", type="password", placeholder="dapiXXXXXXXXXX"), gr.Slider(0, 100, label="Temp", value=0), gr.Slider(0, 100, label="Max token", value=75) ], additional_inputs_accordion_name="Settings" ) if __name__ == "__main__": demo.launch()