File size: 5,316 Bytes
7841db2
0a8cafa
4fc9e70
297f353
7841db2
 
4fc9e70
7841db2
 
10dfcb1
 
 
7660535
 
 
 
10dfcb1
 
 
 
7841db2
 
602d4aa
7841db2
 
 
 
 
 
 
 
 
 
 
0a8cafa
 
7841db2
10dfcb1
 
 
 
 
7841db2
 
 
10dfcb1
 
7841db2
0a8cafa
 
ae21d92
4fc9e70
7841db2
f3d87e2
 
10dfcb1
 
 
 
 
f3d87e2
10dfcb1
ae21d92
10dfcb1
 
 
 
 
 
 
 
 
 
 
 
 
 
ae21d92
10dfcb1
 
 
 
7660535
10dfcb1
 
ae21d92
 
 
 
10dfcb1
ae21d92
 
 
 
 
10dfcb1
 
ae21d92
 
10dfcb1
ae21d92
 
 
10dfcb1
 
 
 
 
 
 
 
 
 
ae21d92
10dfcb1
 
ae21d92
 
 
 
 
 
 
10dfcb1
 
1efa837
10dfcb1
 
 
ae21d92
10dfcb1
 
 
bc4762e
ae21d92
10dfcb1
 
ae21d92
10dfcb1
ae21d92
10dfcb1
 
 
 
f3d87e2
0a8cafa
ae21d92
0a8cafa
7841db2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import gradio as gr
from huggingface_hub import login, InferenceClient
import spaces

# Authenticate with Hugging Face API
api_key = os.getenv("TOKEN")
login(api_key)

# Predefined list of models to compare (can be expanded)
model_options = {
    "Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
    "Qwen-2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
    "Llama-3.2-1B": "meta-llama/Llama-3.2-1B",
    "DeepSeek-V2.5": "deepseek-ai/DeepSeek-V2.5",
    "Athene-V2-Chat": "Nexusflow/Athene-V2-Chat",
}

# Initialize clients for models
clients = {name: InferenceClient(repo_id) for name, repo_id in model_options.items()}

# Define the response function
@spaces.GPU
def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
    selected_models,
):
    messages = [{"role": "system", "content": system_message}] + history
    messages.append({"role": "user", "content": message})

    responses = {}

    # Generate responses for each selected model
    for model_name in selected_models:
        client = clients[model_name]
        response = ""
        for token in client.chat_completion(
            messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
        ):
            delta = token.choices[0].delta.content
            response += delta
        responses[model_name] = response

    return responses


# Build Gradio app
def create_demo():
    with gr.Blocks() as demo:
        gr.Markdown("# AI Model Comparison Tool 🌟")
        gr.Markdown(
            """
            Compare responses from multiple AI models side-by-side.  
            Select models, ask a question, and vote for the best response!
            """
        )

        # Input Section
        with gr.Row():
            system_message = gr.Textbox(
                value="You are a helpful assistant providing answers for technical and customer support queries.",
                label="System message"
            )
            user_message = gr.Textbox(label="Your question", placeholder="Type your question here...")

        with gr.Row():
            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
            )

        # Model Selection Section
        with gr.Row():
            selected_models = gr.CheckboxGroup(
                choices=list(model_options.keys()),
                label="Select models to compare",
                value=["Llama-3.1-70B", "Qwen-2.5-1.5B-Instruct"],  # Default models
            )

        # Dynamic Response Section
        response_boxes = []
        vote_buttons = []
        vote_counts = []

        def generate_response_boxes(models):
            response_boxes.clear()
            for model_name in models:
                with gr.Column() as response_column:
                    response_box = gr.Textbox(label=f"Response from {model_name}", interactive=False)
                    vote_button = gr.Button(f"Vote for {model_name}")
                    vote_count = gr.Number(value=0, label=f"Votes for {model_name}")
                    response_boxes.append((model_name, response_column, response_box, vote_button, vote_count))
            return response_boxes

        # Add a button for generating responses
        submit_button = gr.Button("Generate Responses")
        vote_state = gr.State([0] * len(model_options))  # Initialize votes for all models

        # Generate responses
        def generate_responses(
            message, history, system_message, max_tokens, temperature, top_p, selected_models
        ):
            responses = respond(
                message, history, system_message, max_tokens, temperature, top_p, selected_models
            )
            outputs = []
            for model_name, _, response_box, *_ in response_boxes:
                outputs.append(responses.get(model_name, ""))
            return outputs

        # Handle votes
        def handle_votes(votes, model_name):
            index = list(model_options.keys()).index(model_name)
            votes[index] += 1
            return votes

        # Link button click to generate responses
        submit_button.click(
            generate_responses,
            inputs=[user_message, gr.State([]), system_message, max_tokens, temperature, top_p, selected_models],
            outputs=[response[2] for response in response_boxes],
        )

        # Link voting buttons to handle votes
        for model_name, _, _, vote_button, vote_count in response_boxes:
            vote_button.click(
                lambda votes, name=model_name: handle_votes(votes, name),
                inputs=[vote_state],
                outputs=[vote_state, vote_count],
            )

        # Update response boxes when models are selected
        selected_models.change(
            generate_response_boxes,
            inputs=[selected_models],
            outputs=[response[1] for response in response_boxes],
        )

    return demo


if __name__ == "__main__":
    demo = create_demo()
    demo.launch()