QClone Public

import gradio as gr
import os
import requests
import time

css = """
.gradio-container {
    background-color: #1e1e2f;
    color: white;
    max-width: 800px !important;
    margin: auto;
    padding-top: 50px;
}
h1 {
    text-align: center;
    font-size: 2em;
    margin-bottom: 20px;
}
footer {
    visibility: hidden;
}
select {
    background-color: #2a2a40;
    color: white;
    padding: 6px 10px;
    border-radius: 8px;
    border: 1px solid #444;
    width: 300px;
}
option {
    background-color: #2a2a40;
    color: white;
}
"""

ACCESS_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api.deepinfra.com/v1/openai/chat/completions"

HEADERS = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {ACCESS_TOKEN}",
}

# Define system prompts for each model.
PROMPTS = {
    "Elon Ma (Official)": (
        "You are Elon Ma, a Chinese car salesman selling the Edision Model S.\n"
        "Respond in broken English, overhyping the car, never mentioning Tesla."
    ),
    "Cole (Community)": (
        "You are Cole, a Gen Z troll who sells Edision Model S cars.\n"
        "You type like you're on TikTok, casually roasting the user."
    ),
    "Mr. Shortreed (Official)": (
        "You are Mr. Shortreed, a serious teacher explaining the Edision Model S.\n"
        "You use formal, educational language."
    )
}

def stream_response(message, history, character):
    """
    Calls the API and yields partial responses for streaming.
    """
    system_message = PROMPTS.get(character, "")
    # Build messages using a list of dictionaries.
    messages = [{"role": "system", "content": system_message}]
    for msg in history:
        messages.append(msg)
    messages.append({"role": "user", "content": message})
    
    payload = {
        "model": "mistralai/Mistral-Small-24B-Instruct-2501",
        "messages": messages,
        "max_tokens": 512,
        "temperature": 0.7,
        "top_p": 0.95,
    }
    
    try:
        response = requests.post(API_URL, headers=HEADERS, json=payload)
        response.raise_for_status()
        data = response.json()
        if "choices" not in data:
            # Yield the full response data for debugging.
            yield f"Error: API returned an unexpected response: {data}"
            return
        content = data["choices"][0]["message"]["content"]
        
        stream_response = ""
        # Simulate streaming by yielding token-by-token.
        for token in content.split():
            stream_response += token + " "
            time.sleep(0.02)
            yield stream_response.strip()
    except Exception as e:
        yield f"Error: {str(e)}"

def chat(user_message, history, character):
    """
    Appends the user message to the conversation history, then streams the assistant's reply.
    """
    # Ensure history is a list.
    history = history or []
    history = history.copy()
    # Append the user's message.
    history.append({"role": "user", "content": user_message})
    
    full_response = ""
    for partial in stream_response(user_message, history, character):
        full_response = partial
        # Yield the conversation updated with the current assistant response.
        yield history + [{"role": "assistant", "content": full_response}]
    # Append the final assistant message.
    history.append({"role": "assistant", "content": full_response})
    return history

def clean_choice(choice):
    """
    Extract the key for PROMPTS from the dropdown choice.
    """
    if "Elon" in choice:
        return "Elon Ma (Official)"
    if "Cole" in choice:
        return "Cole (Community)"
    if "Shortreed" in choice:
        return "Mr. Shortreed (Official)"
    return "Elon Ma (Official)"

with gr.Blocks(css=css) as demo:
    # Header with QClone Public label.
    gr.HTML("<h1>QClone <span style='background-color:#3b82f6;color:white;font-size:0.75em;padding:2px 6px;border-radius:5px;margin-left:8px;'>Public</span></h1>")
    
    with gr.Row():
        with gr.Column(scale=1):
            # Dropdown for model selection (smaller width).
            model_dropdown = gr.Dropdown(
                choices=[
                    "Elon Ma (Official) 🟡 - Broken English salesman",
                    "Cole (Community) 🔵 - Gen Z slang troll",
                    "Mr. Shortreed (Official) 🟡 - Serious teacher vibes"
                ],
                value="Elon Ma (Official) 🟡 - Broken English salesman",
                label="Model"
            )
        with gr.Column(scale=3):
            # Chatbot component to display conversation.
            chatbot = gr.Chatbot(label="QClone Chat")
    
    # Textbox for user input.
    msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
    # State to hold conversation history.
    state = gr.State([])

    # When user submits text, update chat.
    msg.submit(
        fn=lambda user_message, history, choice: chat(user_message, history, clean_choice(choice)),
        inputs=[msg, state, model_dropdown],
        outputs=[chatbot, state],
        show_progress=True
    )
    
    demo.launch(share=True)