import gradio as gr import os import requests import time css = """ .gradio-container { background-color: #1e1e2f; color: white; max-width: 800px !important; margin: auto; padding-top: 50px; } h1 { text-align: center; font-size: 2em; margin-bottom: 20px; } footer { visibility: hidden; } select { background-color: #2a2a40; color: white; padding: 6px 10px; border-radius: 8px; border: 1px solid #444; width: 300px; } option { background-color: #2a2a40; color: white; } """ ACCESS_TOKEN = os.getenv("HF_TOKEN") API_URL = "https://api.deepinfra.com/v1/openai/chat/completions" HEADERS = { "Content-Type": "application/json", "Authorization": f"Bearer {ACCESS_TOKEN}", } # Define system prompts for each model. PROMPTS = { "Elon Ma (Official)": ( "You are Elon Ma, a Chinese car salesman selling the Edision Model S.\n" "Respond in broken English, overhyping the car, never mentioning Tesla." ), "Cole (Community)": ( "You are Cole, a Gen Z troll who sells Edision Model S cars.\n" "You type like you're on TikTok, casually roasting the user." ), "Mr. Shortreed (Official)": ( "You are Mr. Shortreed, a serious teacher explaining the Edision Model S.\n" "You use formal, educational language." ) } def stream_response(message, history, character): """ Calls the API and yields partial responses for streaming. """ system_message = PROMPTS.get(character, "") # Build messages using a list of dictionaries. messages = [{"role": "system", "content": system_message}] for msg in history: messages.append(msg) messages.append({"role": "user", "content": message}) payload = { "model": "mistralai/Mistral-Small-24B-Instruct-2501", "messages": messages, "max_tokens": 512, "temperature": 0.7, "top_p": 0.95, } try: response = requests.post(API_URL, headers=HEADERS, json=payload) response.raise_for_status() data = response.json() if "choices" not in data: # Yield the full response data for debugging. yield f"Error: API returned an unexpected response: {data}" return content = data["choices"][0]["message"]["content"] stream_response = "" # Simulate streaming by yielding token-by-token. for token in content.split(): stream_response += token + " " time.sleep(0.02) yield stream_response.strip() except Exception as e: yield f"Error: {str(e)}" def chat(user_message, history, character): """ Appends the user message to the conversation history, then streams the assistant's reply. """ # Ensure history is a list. history = history or [] history = history.copy() # Append the user's message. history.append({"role": "user", "content": user_message}) full_response = "" for partial in stream_response(user_message, history, character): full_response = partial # Yield the conversation updated with the current assistant response. yield history + [{"role": "assistant", "content": full_response}] # Append the final assistant message. history.append({"role": "assistant", "content": full_response}) return history def clean_choice(choice): """ Extract the key for PROMPTS from the dropdown choice. """ if "Elon" in choice: return "Elon Ma (Official)" if "Cole" in choice: return "Cole (Community)" if "Shortreed" in choice: return "Mr. Shortreed (Official)" return "Elon Ma (Official)" with gr.Blocks(css=css) as demo: # Header with QClone Public label. gr.HTML("

QClone Public

") with gr.Row(): with gr.Column(scale=1): # Dropdown for model selection (smaller width). model_dropdown = gr.Dropdown( choices=[ "Elon Ma (Official) 🟡 - Broken English salesman", "Cole (Community) 🔵 - Gen Z slang troll", "Mr. Shortreed (Official) 🟡 - Serious teacher vibes" ], value="Elon Ma (Official) 🟡 - Broken English salesman", label="Model" ) with gr.Column(scale=3): # Chatbot component to display conversation. chatbot = gr.Chatbot(label="QClone Chat") # Textbox for user input. msg = gr.Textbox(label="Your Message", placeholder="Type your message here...") # State to hold conversation history. state = gr.State([]) # When user submits text, update chat. msg.submit( fn=lambda user_message, history, choice: chat(user_message, history, clean_choice(choice)), inputs=[msg, state, model_dropdown], outputs=[chatbot, state], show_progress=True ) demo.launch(share=True)