File size: 5,133 Bytes
9240e91 cee0d54 112778b 9240e91 cee0d54 8aa0d62 112778b aae7b2b 112778b 2ddaddd 112778b 2ddaddd aae7b2b 112778b aae7b2b cee0d54 9240e91 44ef60c cee0d54 9240e91 cee0d54 44ef60c cee0d54 9240e91 7a2e8bf 8aa0d62 ca8ce4a 8aa0d62 22aeb2c eb0ccd7 8aa0d62 eb0ccd7 22aeb2c eb0ccd7 9240e91 ca8ce4a cee0d54 aae7b2b cee0d54 ca8ce4a cee0d54 f0350d4 2ddaddd 112778b eb0ccd7 112778b eb0ccd7 cee0d54 eb0ccd7 f0350d4 eb0ccd7 f0350d4 eb0ccd7 8aa0d62 7a2e8bf ca8ce4a 8aa0d62 eb0ccd7 112778b eb0ccd7 ca8ce4a eb0ccd7 f0350d4 ca8ce4a aae7b2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import gradio as gr
import os
import requests
import time
css = """
.gradio-container {
background-color: #1e1e2f;
color: white;
max-width: 800px !important;
margin: auto;
padding-top: 50px;
}
h1 {
text-align: center;
font-size: 2em;
margin-bottom: 20px;
}
footer {
visibility: hidden;
}
select {
background-color: #2a2a40;
color: white;
padding: 6px 10px;
border-radius: 8px;
border: 1px solid #444;
width: 300px;
}
option {
background-color: #2a2a40;
color: white;
}
"""
ACCESS_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api.deepinfra.com/v1/openai/chat/completions"
HEADERS = {
"Content-Type": "application/json",
"Authorization": f"Bearer {ACCESS_TOKEN}",
}
# Define system prompts for each model.
PROMPTS = {
"Elon Ma (Official)": (
"You are Elon Ma, a Chinese car salesman selling the Edision Model S.\n"
"Respond in broken English, overhyping the car, never mentioning Tesla."
),
"Cole (Community)": (
"You are Cole, a Gen Z troll who sells Edision Model S cars.\n"
"You type like you're on TikTok, casually roasting the user."
),
"Mr. Shortreed (Official)": (
"You are Mr. Shortreed, a serious teacher explaining the Edision Model S.\n"
"You use formal, educational language."
)
}
def stream_response(message, history, character):
"""
Calls the API and yields partial responses for streaming.
"""
system_message = PROMPTS.get(character, "")
# Build messages using a list of dictionaries.
messages = [{"role": "system", "content": system_message}]
for msg in history:
messages.append(msg)
messages.append({"role": "user", "content": message})
payload = {
"model": "mistralai/Mistral-Small-24B-Instruct-2501",
"messages": messages,
"max_tokens": 512,
"temperature": 0.7,
"top_p": 0.95,
}
try:
response = requests.post(API_URL, headers=HEADERS, json=payload)
response.raise_for_status()
data = response.json()
if "choices" not in data:
# Yield the full response data for debugging.
yield f"Error: API returned an unexpected response: {data}"
return
content = data["choices"][0]["message"]["content"]
stream_response = ""
# Simulate streaming by yielding token-by-token.
for token in content.split():
stream_response += token + " "
time.sleep(0.02)
yield stream_response.strip()
except Exception as e:
yield f"Error: {str(e)}"
def chat(user_message, history, character):
"""
Appends the user message to the conversation history, then streams the assistant's reply.
"""
# Ensure history is a list.
history = history or []
history = history.copy()
# Append the user's message.
history.append({"role": "user", "content": user_message})
full_response = ""
for partial in stream_response(user_message, history, character):
full_response = partial
# Yield the conversation updated with the current assistant response.
yield history + [{"role": "assistant", "content": full_response}]
# Append the final assistant message.
history.append({"role": "assistant", "content": full_response})
return history
def clean_choice(choice):
"""
Extract the key for PROMPTS from the dropdown choice.
"""
if "Elon" in choice:
return "Elon Ma (Official)"
if "Cole" in choice:
return "Cole (Community)"
if "Shortreed" in choice:
return "Mr. Shortreed (Official)"
return "Elon Ma (Official)"
with gr.Blocks(css=css) as demo:
# Header with QClone Public label.
gr.HTML("<h1>QClone <span style='background-color:#3b82f6;color:white;font-size:0.75em;padding:2px 6px;border-radius:5px;margin-left:8px;'>Public</span></h1>")
with gr.Row():
with gr.Column(scale=1):
# Dropdown for model selection (smaller width).
model_dropdown = gr.Dropdown(
choices=[
"Elon Ma (Official) 🟡 - Broken English salesman",
"Cole (Community) 🔵 - Gen Z slang troll",
"Mr. Shortreed (Official) 🟡 - Serious teacher vibes"
],
value="Elon Ma (Official) 🟡 - Broken English salesman",
label="Model"
)
with gr.Column(scale=3):
# Chatbot component to display conversation.
chatbot = gr.Chatbot(label="QClone Chat")
# Textbox for user input.
msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
# State to hold conversation history.
state = gr.State([])
# When user submits text, update chat.
msg.submit(
fn=lambda user_message, history, choice: chat(user_message, history, clean_choice(choice)),
inputs=[msg, state, model_dropdown],
outputs=[chatbot, state],
show_progress=True
)
demo.launch(share=True)
|