Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import requests | |
import time | |
from datetime import datetime | |
from typing import List, Dict | |
from session_manager import SessionManager # only if you need sessions | |
# Initialize session manager and get HF API key (adjust if not using sessions) | |
session_manager = SessionManager() | |
HF_API_KEY = os.getenv("HF_API_KEY") | |
# Model endpoints configuration | |
MODEL_ENDPOINTS = { | |
"Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct", | |
"Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct", | |
"Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct", | |
} | |
def query_model(model_name: str, messages: List[Dict[str, str]]) -> str: | |
""" | |
Query a single model with the conversation so far (list of dicts with 'role' and 'content'). | |
""" | |
endpoint = MODEL_ENDPOINTS[model_name] | |
headers = { | |
"Authorization": f"Bearer {HF_API_KEY}", | |
"Content-Type": "application/json" | |
} | |
# Combine conversation into a single string (simple example) | |
conversation = "\n".join(f"{m['role']}: {m['content']}" for m in messages) | |
# Model-specific prompt formatting | |
model_prompts = { | |
"Qwen2.5-72B-Instruct": ( | |
f"<|im_start|>system\nCollaborate with other experts:\n{conversation}<|im_end|>\n" | |
"<|im_start|>assistant\nMy analysis:" | |
), | |
"Llama3.3-70B-Instruct": ( | |
"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n" | |
f"Build on the conversation:\n{conversation}<|eot_id|>\n" | |
"<|start_header_id|>assistant<|end_header_id|>\nMy contribution:" | |
), | |
"Qwen2.5-Coder-32B-Instruct": ( | |
f"<|im_start|>system\nTechnical discussion context:\n{conversation}<|im_end|>\n" | |
"<|im_start|>assistant\nTechnical perspective:" | |
) | |
} | |
stop_sequences = { | |
"Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"], | |
"Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"], | |
"Qwen2.5-Coder-32B-Instruct": ["<|im_end|>", "<|endoftext|>"] | |
} | |
payload = { | |
"inputs": model_prompts[model_name], | |
"parameters": { | |
"max_tokens": 1024, | |
"temperature": 0.7, | |
"stop_sequences": stop_sequences[model_name], | |
"return_full_text": False | |
} | |
} | |
try: | |
response = requests.post(endpoint, json=payload, headers=headers) | |
response.raise_for_status() | |
generated = response.json()[0]["generated_text"] | |
# Clean up possible leftover tokens | |
generated = generated.split("<|")[0].strip() | |
return generated | |
except Exception as e: | |
return f"{model_name} error: {str(e)}" | |
def on_new_session(): | |
"""Create a new session and clear the chat.""" | |
new_id = session_manager.create_session() | |
return new_id, [] | |
def user_message(user_msg, history, session_id): | |
""" | |
After the user hits enter, append the user's message to the conversation. | |
Return updated conversation so the UI can display it. | |
""" | |
if not user_msg.strip(): | |
return "", history # if user didn't type anything | |
# Append the new user message to the conversation | |
history.append({"role": "user", "content": user_msg}) | |
return "", history | |
def bot_reply(history, session_id): | |
""" | |
Stream the multi-model response. We rely on the *last* user message in `history`, | |
then call each model in turn, appending partial updates. Yields updated conversation each time. | |
""" | |
if not history or history[-1]["role"] != "user": | |
return # There's no new user message to respond to | |
# Optionally load existing session, if you have session logic | |
session = session_manager.load_session(session_id) if session_id else None | |
if session is None: | |
session = {"history": []} | |
# 1) Qwen2.5-Coder-32B | |
# Add an assistant message placeholder | |
history.append({"role": "assistant", "content": "π΅ Qwen2.5-Coder-32B-Instruct is thinking..."}) | |
yield history | |
resp1 = query_model("Qwen2.5-Coder-32B-Instruct", history) | |
updated_content = f"π΅ **Qwen2.5-Coder-32B-Instruct**\n{resp1}" | |
history[-1]["content"] = updated_content | |
yield history | |
# 2) Qwen2.5-72B | |
updated_content += "\n\nπ£ Qwen2.5-72B-Instruct is thinking..." | |
history[-1]["content"] = updated_content | |
yield history | |
resp2 = query_model("Qwen2.5-72B-Instruct", history) | |
updated_content += f"\n\nπ£ **Qwen2.5-72B-Instruct**\n{resp2}" | |
history[-1]["content"] = updated_content | |
yield history | |
# 3) Llama3.3-70B | |
updated_content += "\n\nπ‘ Llama3.3-70B-Instruct is thinking..." | |
history[-1]["content"] = updated_content | |
yield history | |
resp3 = query_model("Llama3.3-70B-Instruct", history) | |
updated_content += f"\n\nπ‘ **Llama3.3-70B-Instruct**\n{resp3}" | |
history[-1]["content"] = updated_content | |
yield history | |
# Save session, if needed | |
session["history"] = history | |
session_manager.save_session(session_id, session) | |
def clear_chat(): | |
""" | |
Clears the Chatbot entirely (set it to an empty list). | |
""" | |
return [] | |
# Build the Gradio Blocks interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## Multi-LLM Collaboration Chat (Streaming)") | |
with gr.Row(): | |
session_id = gr.State(session_manager.create_session) | |
new_session_btn = gr.Button("π New Session") | |
# Chatbot with "type='messages'" for streaming messages and LaTeX delimiters | |
chatbot = gr.Chatbot( | |
type="messages", | |
height=550, | |
latex_delimiters=[ | |
{"left": "$", "right": "$", "display": False}, # inline math | |
{"left": "$$", "right": "$$", "display": True} # display math | |
] | |
) | |
msg = gr.Textbox(label="Your Message") | |
clear_btn = gr.Button("Clear") | |
# Wire up the events: | |
# 1) On user submit: | |
msg.submit( | |
fn=user_message, | |
inputs=[msg, chatbot, session_id], | |
outputs=[msg, chatbot], | |
queue=False | |
).then( | |
fn=bot_reply, | |
inputs=[chatbot, session_id], | |
outputs=[chatbot] | |
) | |
# 2) On "Clear" click, empty the chat: | |
clear_btn.click(fn=clear_chat, outputs=chatbot, queue=False) | |
# 3) On "New Session" click, get a fresh session ID and clear chat: | |
new_session_btn.click(fn=on_new_session, outputs=[session_id, chatbot], queue=False) | |
if __name__ == "__main__": | |
demo.launch() | |