Spaces:

luminoussg
/

choupijiang

Sleeping

File size: 6,607 Bytes

43b5bef
c1e5d4c
518be16
c1e5d4c
0d6849e
cc3006a
 
43b5bef
cc3006a
 
cf38aa5
43b5bef
c1e5d4c
43b5bef
 
 
 
 
 
c1e5d4c
 
 
518be16
 
c1e5d4c
518be16
77ac272
6733659
 
 
 
c9870b1
 
6733659
 
c9870b1
 
6733659
 
 
c9870b1
 
6733659
4766698
c9870b1
 
 
 
 
 
 
 
 
 
c1e5d4c
c9870b1
6617dfe
4766698
c1e5d4c
c9870b1
 
6617dfe
518be16
77ac272
c1e5d4c
 
 
c9870b1
 
 
 
 
d82511d
c1e5d4c
c9870b1
c1e5d4c
89c6fc8
28f1fca
57a76f2
0d6849e
57a76f2
 
73c4292
28f1fca
 
 
 
 
 
 
 
 
 
0d6849e
 
 
 
 
28f1fca
89c6fc8
 
0fac2da
327109c
cc3006a
 
 
 
 
 
28f1fca
89c6fc8
f32ce56
0fac2da
327109c
cc3006a
 
 
 
 
 
28f1fca
89c6fc8
f32ce56
0fac2da
327109c
cc3006a
 
 
 
 
 
28f1fca
89c6fc8
28f1fca
 
cc3006a
28f1fca
89c6fc8
 
0fac2da
 
 
 
 
 
 
 
 
43b5bef
57a76f2
0fac2da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89c6fc8
0fac2da
 
 
 
 
 
 
 
 
43b5bef
c1e5d4c
cc3006a

import gradio as gr
import os
import requests
import threading
from datetime import datetime
from typing import List, Dict, Any, Generator
from session_manager import SessionManager

# Initialize session manager and get HF API key
session_manager = SessionManager()
HF_API_KEY = os.getenv("HF_API_KEY")

# Model endpoints configuration
MODEL_ENDPOINTS = {
    "Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
    "Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
    "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
}

def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
    """Query a single model with the chat history"""
    endpoint = MODEL_ENDPOINTS[model_name]
    headers = {
        "Authorization": f"Bearer {HF_API_KEY}",
        "Content-Type": "application/json"
    }
    
    # Build full conversation history for context
    conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
    
    # Model-specific prompt formatting with full history
    model_prompts = {
        "Qwen2.5-72B-Instruct": (
            f"<|im_start|>system\nCollaborate with other experts. Previous discussion:\n{conversation}<|im_end|>\n"
            "<|im_start|>assistant\nMy analysis:"
        ),
        "Llama3.3-70B-Instruct": (
            "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
            f"Build upon this discussion:\n{conversation}<|eot_id|>\n"
            "<|start_header_id|>assistant<|end_header_id|>\nMy contribution:"
        ),
        "Qwen2.5-Coder-32B-Instruct": (
            f"<|im_start|>system\nTechnical discussion context:\n{conversation}<|im_end|>\n"
            "<|im_start|>assistant\nTechnical perspective:"
        )
    }

    # Model-specific stop sequences
    stop_sequences = {
        "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
        "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
        "Qwen2.5-Coder-32B-Instruct": ["<|im_end|>", "<|endoftext|>"]
    }

    payload = {
        "inputs": model_prompts[model_name],
        "parameters": {
            "max_tokens": 2048,
            "temperature": 0.7,
            "stop_sequences": stop_sequences[model_name],
            "return_full_text": False
        }
    }
    
    try:
        response = requests.post(endpoint, json=payload, headers=headers)
        response.raise_for_status()
        result = response.json()[0]['generated_text']
        # Clean up response formatting
        result = result.split('<|')[0]  # Remove any remaining special tokens
        result = result.replace('**', '').replace('##', '')  # Remove markdown
        result = result.strip()  # Remove leading/trailing whitespace
        return result  # Return complete response
    except Exception as e:
        return f"{model_name} error: {str(e)}"

def respond(message: str, history: List[List[str]], session_id: str) -> str:
    """Handle sequential model responses with context preservation"""
    # Load or initialize session
    session = session_manager.load_session(session_id)
    if not isinstance(session, dict) or "history" not in session:
        session = {"history": []}
    
    # Build context from session history
    messages = []
    for entry in session["history"]:
        if entry["type"] == "user":
            messages.append({"role": "user", "content": entry["content"]})
        else:
            messages.append({"role": "assistant", "content": f"{entry['model']}: {entry['content']}"})
    
    # Add current message
    messages.append({"role": "user", "content": message})
    session["history"].append({
        "timestamp": datetime.now().isoformat(),
        "type": "user",
        "content": message
    })
    
    responses = []
    
    # First model response
    response1 = query_model("Qwen2.5-Coder-32B-Instruct", messages)
    session["history"].append({
        "timestamp": datetime.now().isoformat(),
        "type": "assistant",
        "model": "Qwen2.5-Coder-32B-Instruct",
        "content": response1
    })
    messages.append({"role": "assistant", "content": f"Qwen2.5-Coder-32B-Instruct: {response1}"})
    responses.append(f"🔵 **Qwen2.5-Coder-32B-Instruct**\n{response1}")
    
    # Second model response
    response2 = query_model("Qwen2.5-72B-Instruct", messages)
    session["history"].append({
        "timestamp": datetime.now().isoformat(),
        "type": "assistant",
        "model": "Qwen2.5-72B-Instruct",
        "content": response2
    })
    messages.append({"role": "assistant", "content": f"Qwen2.5-72B-Instruct: {response2}"})
    responses.append(f"🟣 **Qwen2.5-72B-Instruct**\n{response2}")
    
    # Final model response
    response3 = query_model("Llama3.3-70B-Instruct", messages)
    session["history"].append({
        "timestamp": datetime.now().isoformat(),
        "type": "assistant",
        "model": "Llama3.3-70B-Instruct",
        "content": response3
    })
    messages.append({"role": "assistant", "content": f"Llama3.3-70B-Instruct: {response3}"})
    responses.append(f"🟡 **Llama3.3-70B-Instruct**\n{response3}")
    
    # Save final session state
    session_manager.save_session(session_id, session)
    
    # Return responses
    return "\n\n".join(responses)

# Custom CSS for styling
css = """
.message { padding: 15px; margin: 10px 0; border-radius: 10px; }
.assistant { background: #f8fafc; border-left: 4px solid #3b82f6; }
.user { background: #eff6ff; border-left: 4px solid #60a5fa; }
.model-name { font-weight: 600; color: #1e40af; margin-bottom: 8px; }
.thinking { color: #6b7280; font-style: italic; }
"""

# Create the Gradio interface
demo = gr.ChatInterface(
    fn=respond,
    title="Multi-LLM Collaboration Chat",
    description="Experience collaborative AI thinking with three powerful language models",
    examples=[
        ["Explain how quantum computing works"],
        ["Write a Python function to find prime numbers"],
    ],
    additional_inputs=[gr.State(session_manager.create_session)],
    chatbot=gr.Chatbot(
        height=600,
        show_label=False,
        bubble_full_width=False,
        show_copy_button=True,
        container=True,
        sanitize_html=False,
        render_markdown=True
    ),
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="indigo",
        neutral_hue="slate",
        font=("Inter", "sans-serif"),
    ),
    css=css,
)

if __name__ == "__main__":
    demo.launch(share=True)