Spaces:

bazingapaa
/

compare-models

Running on Zero

File size: 5,316 Bytes

7841db2
0a8cafa
4fc9e70
297f353
7841db2
 
4fc9e70
7841db2
 
10dfcb1
 
 
7660535
 
 
 
10dfcb1
 
 
 
7841db2
 
602d4aa
7841db2
 
 
 
 
 
 
 
 
 
 
0a8cafa
 
7841db2
10dfcb1
 
 
 
 
7841db2
 
 
10dfcb1
 
7841db2
0a8cafa
 
ae21d92
4fc9e70
7841db2
f3d87e2
 
10dfcb1
 
 
 
 
f3d87e2
10dfcb1
ae21d92
10dfcb1
 
 
 
 
 
 
 
 
 
 
 
 
 
ae21d92
10dfcb1
 
 
 
7660535
10dfcb1
 
ae21d92
 
 
 
10dfcb1
ae21d92
 
 
 
 
10dfcb1
 
ae21d92
 
10dfcb1
ae21d92
 
 
10dfcb1
 
 
 
 
 
 
 
 
 
ae21d92
10dfcb1
 
ae21d92
 
 
 
 
 
 
10dfcb1
 
1efa837
10dfcb1
 
 
ae21d92
10dfcb1
 
 
bc4762e
ae21d92
10dfcb1
 
ae21d92
10dfcb1
ae21d92
10dfcb1
 
 
 
f3d87e2
0a8cafa
ae21d92
0a8cafa
7841db2

import os
import gradio as gr
from huggingface_hub import login, InferenceClient
import spaces

# Authenticate with Hugging Face API
api_key = os.getenv("TOKEN")
login(api_key)

# Predefined list of models to compare (can be expanded)
model_options = {
    "Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
    "Qwen-2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
    "Llama-3.2-1B": "meta-llama/Llama-3.2-1B",
    "DeepSeek-V2.5": "deepseek-ai/DeepSeek-V2.5",
    "Athene-V2-Chat": "Nexusflow/Athene-V2-Chat",
}

# Initialize clients for models
clients = {name: InferenceClient(repo_id) for name, repo_id in model_options.items()}

# Define the response function
@spaces.GPU
def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
    selected_models,
):
    messages = [{"role": "system", "content": system_message}] + history
    messages.append({"role": "user", "content": message})

    responses = {}

    # Generate responses for each selected model
    for model_name in selected_models:
        client = clients[model_name]
        response = ""
        for token in client.chat_completion(
            messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
        ):
            delta = token.choices[0].delta.content
            response += delta
        responses[model_name] = response

    return responses


# Build Gradio app
def create_demo():
    with gr.Blocks() as demo:
        gr.Markdown("# AI Model Comparison Tool 🌟")
        gr.Markdown(
            """
            Compare responses from multiple AI models side-by-side.  
            Select models, ask a question, and vote for the best response!
            """
        )

        # Input Section
        with gr.Row():
            system_message = gr.Textbox(
                value="You are a helpful assistant providing answers for technical and customer support queries.",
                label="System message"
            )
            user_message = gr.Textbox(label="Your question", placeholder="Type your question here...")

        with gr.Row():
            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
            )

        # Model Selection Section
        with gr.Row():
            selected_models = gr.CheckboxGroup(
                choices=list(model_options.keys()),
                label="Select models to compare",
                value=["Llama-3.1-70B", "Qwen-2.5-1.5B-Instruct"],  # Default models
            )

        # Dynamic Response Section
        response_boxes = []
        vote_buttons = []
        vote_counts = []

        def generate_response_boxes(models):
            response_boxes.clear()
            for model_name in models:
                with gr.Column() as response_column:
                    response_box = gr.Textbox(label=f"Response from {model_name}", interactive=False)
                    vote_button = gr.Button(f"Vote for {model_name}")
                    vote_count = gr.Number(value=0, label=f"Votes for {model_name}")
                    response_boxes.append((model_name, response_column, response_box, vote_button, vote_count))
            return response_boxes

        # Add a button for generating responses
        submit_button = gr.Button("Generate Responses")
        vote_state = gr.State([0] * len(model_options))  # Initialize votes for all models

        # Generate responses
        def generate_responses(
            message, history, system_message, max_tokens, temperature, top_p, selected_models
        ):
            responses = respond(
                message, history, system_message, max_tokens, temperature, top_p, selected_models
            )
            outputs = []
            for model_name, _, response_box, *_ in response_boxes:
                outputs.append(responses.get(model_name, ""))
            return outputs

        # Handle votes
        def handle_votes(votes, model_name):
            index = list(model_options.keys()).index(model_name)
            votes[index] += 1
            return votes

        # Link button click to generate responses
        submit_button.click(
            generate_responses,
            inputs=[user_message, gr.State([]), system_message, max_tokens, temperature, top_p, selected_models],
            outputs=[response[2] for response in response_boxes],
        )

        # Link voting buttons to handle votes
        for model_name, _, _, vote_button, vote_count in response_boxes:
            vote_button.click(
                lambda votes, name=model_name: handle_votes(votes, name),
                inputs=[vote_state],
                outputs=[vote_state, vote_count],
            )

        # Update response boxes when models are selected
        selected_models.change(
            generate_response_boxes,
            inputs=[selected_models],
            outputs=[response[1] for response in response_boxes],
        )

    return demo


if __name__ == "__main__":
    demo = create_demo()
    demo.launch()