import gradio as gr import requests import json API_URL = "https://api.whaleflux.com/whaleflux/v1/model/deployment/enova-service-8fbf8085-2d13-4583/v1/chat/completions" API_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyaWQiOiJNVGMwTlRVMk5EVTROaTR4T0dNd01qUXpaVEJsTVRsaVpURmhPV1V5TkdVMk9UUTRabVppTjJNME16RmtaVGt4WkRjM056RmtPR1l4TTJFek1HRmpNek15WW1JMFlUTmpPVEUwIiwiaWF0IjoxNzQ1NTY0NTg2LCJleHAiOi0xLCJvcmdfaWQiOiIxMDAyNzA5NSIsInNjb3BlIjp7InBlcm1pc3Npb24iOm51bGx9LCJ0eXBlIjoiYXBpLXRva2VuIiwiTWFwQ2xhaW1zIjpudWxsfQ.fw6eZmOWr7gBqKd6X5duGao0MOimZ69Fv0oeBVWy0Gk" """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) headers = { "Content-Type": "application/json", "Authorization": f"Bearer {API_TOKEN}" } data = { "model": "/data/DMind-1-mini", "stream": True, "messages": messages, "temperature": temperature, "top_p": top_p, "top_k": 20, "min_p": 0.1 } response = "" with requests.post(API_URL, headers=headers, json=data, stream=True) as r: for line in r.iter_lines(): if line: try: json_response = json.loads(line.decode('utf-8').replace('data: ', '')) if 'choices' in json_response and len(json_response['choices']) > 0: token = json_response['choices'][0].get('delta', {}).get('content', '') if token: response += token yield response except json.JSONDecodeError: continue """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.96, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()