Spaces:

luminoussg
/

choupijiang

Sleeping

App Files Files Community

choupijiang / app.py

luminoussg

Update app.py

c1e5d4c verified 5 months ago

raw

history blame

2.73 kB

	import gradio as gr
	import os
	import requests
	import threading
	from typing import List, Dict, Any

	# Get the Hugging Face API key from Spaces secrets
	HF_API_KEY = os.getenv("HF_API_KEY")

	# Model endpoints configuration
	MODEL_ENDPOINTS = {
	"Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
	"Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
	"Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
	}

	def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
	"""Query a single model with the chat history"""
	endpoint = MODEL_ENDPOINTS[model_name]
	headers = {
	"Authorization": f"Bearer {HF_API_KEY}",
	"Content-Type": "application/json"
	}

	# Format the prompt according to each model's requirements
	prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])

	payload = {
	"inputs": prompt,
	"parameters": {
	"max_tokens": 1024,
	"temperature": 0.7,
	"stop_sequences": ["\nUser:", "\nAssistant:", "###"]
	}
	}

	try:
	response = requests.post(endpoint, json=payload, headers=headers)
	response.raise_for_status()
	return response.json()[0]['generated_text']
	except Exception as e:
	return f"Error from {model_name}: {str(e)}"

	def respond(message: str, history: List[List[str]]) -> str:
	"""Handle chat responses from all models"""
	# Prepare messages in OpenAI format
	messages = [{"role": "user", "content": message}]

	# Create threads for concurrent model queries
	threads = []
	results = {}

	def get_model_response(model_name):
	results[model_name] = query_model(model_name, messages)

	for model_name in MODEL_ENDPOINTS:
	thread = threading.Thread(target=get_model_response, args=(model_name,))
	thread.start()
	threads.append(thread)

	# Wait for all threads to complete
	for thread in threads:
	thread.join()

	# Format responses from all models
	responses = []
	for model_name, response in results.items():
	responses.append(f"{model_name}:\n{response}")

	return "\n\n".join(responses)

	# Create the Gradio interface
	chat_interface = gr.ChatInterface(
	respond,
	title="Multi-LLM Collaboration Chat",
	description="A group chat with Qwen2.5-72B, Llama3.3-70B, and Qwen2.5-Coder-32B",
	examples=["How can I optimize Python code?", "Explain quantum computing basics"],
	theme="soft"
	)

	if __name__ == "__main__":
	chat_interface.launch(share=True)