Spaces:
Sleeping
Sleeping
File size: 3,918 Bytes
43b5bef c1e5d4c 518be16 c1e5d4c 43b5bef c1e5d4c cf38aa5 43b5bef c1e5d4c 43b5bef c1e5d4c 518be16 c1e5d4c 518be16 77ac272 c9870b1 c1e5d4c c9870b1 6617dfe c1e5d4c c9870b1 6617dfe 518be16 77ac272 c1e5d4c c9870b1 c1e5d4c c9870b1 c1e5d4c 6617dfe c1e5d4c c29137d c1e5d4c 518be16 c1e5d4c 518be16 c1e5d4c 43b5bef c1e5d4c c9870b1 43b5bef c1e5d4c 43b5bef c1e5d4c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
import os
import requests
import threading
from typing import List, Dict, Any
# Get the Hugging Face API key from Spaces secrets
HF_API_KEY = os.getenv("HF_API_KEY")
# Model endpoints configuration
MODEL_ENDPOINTS = {
"Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
"Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
"Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
}
def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
"""Query a single model with the chat history"""
endpoint = MODEL_ENDPOINTS[model_name]
headers = {
"Authorization": f"Bearer {HF_API_KEY}",
"Content-Type": "application/json"
}
# Model-specific prompt formatting
model_prompts = {
"Qwen2.5-72B-Instruct": (
f"<|im_start|>user\n{messages[-1]['content']}<|im_end|>\n<|im_start|>assistant\n"
),
"Llama3.3-70B-Instruct": (
"<|begin_of_text|>"
"<|start_header_id|>user<|end_header_id|>\n\n"
f"{messages[-1]['content']}<|eot_id|>"
"<|start_header_id|>assistant<|end_header_id|>\n\n"
),
"Qwen2.5-Coder-32B-Instruct": (
f"<|im_start|>user\n{messages[-1]['content']}<|im_end|>\n<|im_start|>assistant\n"
)
}
# Model-specific stop sequences
stop_sequences = {
"Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
"Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
"Qwen2.5-Coder-32B-Instruct": ["<|im_end|>", "<|endoftext|>"]
}
payload = {
"inputs": model_prompts[model_name],
"parameters": {
"max_tokens": 1024,
"temperature": 0.7,
"stop_sequences": stop_sequences[model_name],
"return_full_text": False
}
}
try:
response = requests.post(endpoint, json=payload, headers=headers)
response.raise_for_status()
result = response.json()[0]['generated_text']
# Clean up response formatting
result = result.split('<|')[0] # Remove any remaining special tokens
result = result.replace('**', '').replace('##', '') # Remove markdown
result = result.strip() # Remove leading/trailing whitespace
return result.split('\n\n')[0] # Return only first paragraph
except Exception as e:
return f"{model_name} error: {str(e)}"
def respond(message: str, history: List[List[str]]) -> str:
"""Handle chat responses from all models"""
# Prepare messages in OpenAI format
messages = [{"role": "user", "content": message}]
# Create threads for concurrent model queries
threads = []
results = {}
def get_model_response(model_name):
results[model_name] = query_model(model_name, messages)
for model_name in MODEL_ENDPOINTS:
thread = threading.Thread(target=get_model_response, args=(model_name,))
thread.start()
threads.append(thread)
# Wait for all threads to complete
for thread in threads:
thread.join()
# Format responses from all models
responses = []
for model_name, response in results.items():
responses.append(f"**{model_name}**:\n{response}")
# Format responses with clear separation
return "\n\n----------------------------------------\n\n".join(responses)
# Create the Gradio interface
chat_interface = gr.ChatInterface(
respond,
title="Multi-LLM Collaboration Chat",
description="A group chat with Qwen2.5-72B, Llama3.3-70B, and Qwen2.5-Coder-32B",
examples=["How can I optimize Python code?", "Explain quantum computing basics"],
theme="soft"
)
if __name__ == "__main__":
chat_interface.launch(share=True)
|