Spaces:

luminoussg
/

choupijiang

Sleeping

App Files Files Community

choupijiang / app.py

luminoussg

Update app.py

dcc233f verified 5 months ago

raw

history blame

6.07 kB

	import gradio as gr
	import os
	import threading
	from datetime import datetime
	from typing import List, Dict, Any, Generator
	from session_manager import SessionManager
	from huggingface_hub import InferenceClient

	# Initialize session manager and get HF API key
	session_manager = SessionManager()
	HF_API_KEY = os.getenv("HF_API_KEY")

	# Model endpoints configuration
	MODEL_ENDPOINTS = {
	"Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
	"Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
	"Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
	}

	def query_model(model_name: str, messages: List[Dict[str, str]]) -> Generator[str, None, None]:
	"""Query a single model with the chat history and stream the response"""
	endpoint = MODEL_ENDPOINTS[model_name]

	# Build full conversation history for context
	conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])

	# Model-specific prompt formatting with full history
	model_prompts = {
	"Qwen2.5-72B-Instruct": (
	f"<\|im_start\|>system\nCollaborate with other experts. Previous discussion:\n{conversation}<\|im_end\|>\n"
	"<\|im_start\|>assistant\nMy analysis:"
	),
	"Llama3.3-70B-Instruct": (
	"<\|begin_of_text\|><\|start_header_id\|>system<\|end_header_id\|>\n"
	f"Build upon this discussion:\n{conversation}<\|eot_id\|>\n"
	"<\|start_header_id\|>assistant<\|end_header_id\|>\nMy contribution:"
	),
	"Qwen2.5-Coder-32B-Instruct": (
	f"<\|im_start\|>system\nTechnical discussion context:\n{conversation}<\|im_end\|>\n"
	"<\|im_start\|>assistant\nTechnical perspective:"
	)
	}

	client = InferenceClient(base_url=endpoint, token=HF_API_KEY)

	try:
	stream = client.chat.completions.create(
	messages=[{"role": "system", "content": model_prompts[model_name]}],
	stream=True,
	max_tokens=2048,
	temperature=0.7,
	)

	for chunk in stream:
	content = chunk.choices[0].delta.content or ""
	yield content

	except Exception as e:
	yield f"{model_name} error: {str(e)}"

	def respond(message: str, history: List[List[str]], session_id: str) -> Generator[str, None, None]:
	"""Handle sequential model responses with context preservation and streaming"""
	# Load or initialize session
	session = session_manager.load_session(session_id)
	if not isinstance(session, dict) or "history" not in session:
	session = {"history": []}

	# Build context from session history
	messages = []
	for entry in session["history"]:
	if entry["type"] == "user":
	messages.append({"role": "user", "content": entry["content"]})
	else:
	messages.append({"role": "assistant", "content": f"{entry['model']}: {entry['content']}"})

	# Add current message
	messages.append({"role": "user", "content": message})
	session["history"].append({
	"timestamp": datetime.now().isoformat(),
	"type": "user",
	"content": message
	})

	# Model responses
	model_names = ["Qwen2.5-Coder-32B-Instruct", "Qwen2.5-72B-Instruct", "Llama3.3-70B-Instruct"]
	model_colors = ["🔵", "🟣", "🟡"]
	responses = {}

	# Initialize responses
	for model_name in model_names:
	responses[model_name] = ""

	# Stream responses from each model
	for i, model_name in enumerate(model_names):
	yield f"{model_colors[i]} {model_name} is thinking..."

	full_response = ""
	for chunk in query_model(model_name, messages):
	full_response += chunk
	yield f"{model_colors[i]} {model_name}\n{full_response}"

	# Update session history and messages
	session["history"].append({
	"timestamp": datetime.now().isoformat(),
	"type": "assistant",
	"model": model_name,
	"content": full_response
	})
	messages.append({"role": "assistant", "content": f"{model_name}: {full_response}"})
	responses[model_name] = full_response

	# Save final session state
	session_manager.save_session(session_id, session)

	# Return final combined response (optional)
	combined_response = ""
	for i, model_name in enumerate(model_names):
	combined_response += f"{model_colors[i]} {model_name}\n{responses[model_name]}\n\n"
	yield combined_response

	# Create the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("## Multi-LLM Collaboration Chat")

	with gr.Row():
	session_id = gr.State(session_manager.create_session)
	new_session = gr.Button("🔄 New Session")

	chatbot = gr.Chatbot(height=600)
	msg = gr.Textbox(label="Message")
	save_history = gr.Checkbox(label="Save Conversation History", value=True)

	def on_new_session():
	new_id = session_manager.create_session()
	return new_id, []

	def user(message, history, session_id, save_history):
	if save_history:
	session = session_manager.load_session(session_id)
	session["history"].append({
	"timestamp": datetime.now().isoformat(),
	"type": "user",
	"content": message
	})
	session_manager.save_session(session_id, session)
	return "", history + [[message, None]]

	def bot(history, session_id):
	if history and history[-1][1] is None:
	message = history[-1][0]
	for response in respond(message, history[:-1], session_id):
	history[-1][1] = response
	yield history

	msg.submit(user, [msg, chatbot, session_id, save_history], [msg, chatbot]).then(
	bot, [chatbot, session_id], [chatbot]
	)
	new_session.click(on_new_session, None, [session_id, chatbot])

	if __name__ == "__main__":
	demo.launch(share=True)