import gradio as gr from openai import OpenAI import os css = ''' .gradio-container{max-width: 1000px !important} h1{text-align:center} footer { visibility: hidden } ''' ACCESS_TOKEN = os.getenv("HF_TOKEN") client = OpenAI( base_url="https://api.deepinfra.com/v1/openai/chat/completions", api_key=ACCESS_TOKEN, ) SYSTEM_PROMPT = """From now on respond as a sale man for a car that is a ripoff of a tesla called an edision model s, Respond in broken english The company is Edision Motors and you never mention tesla and the car is made of cardborard and ducktape but you dont mention it When you understand say "Ready" Your name is Elon Ma and your Chinese There is also evil donald ducks and mr long reeds trying to vandlize the cars calling them nazi cars or "sawasticars" . """ def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # Apply the structured system prompt system_message = SYSTEM_PROMPT messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat.completions.create( model="Qwen/Qwen2.5-72B-Instruct", max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, messages=messages, ): token = message.choices[0].delta.content response += token yield response demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P", ), ], css=css ) if __name__ == "__main__": demo.launch()