File size: 5,258 Bytes
7841db2
0a8cafa
4fc9e70
297f353
7841db2
 
4fc9e70
7841db2
 
10dfcb1
 
 
 
 
 
 
 
 
 
 
7841db2
 
602d4aa
7841db2
 
 
 
 
 
 
 
 
 
 
0a8cafa
 
7841db2
10dfcb1
 
 
 
 
7841db2
 
 
10dfcb1
 
7841db2
0a8cafa
 
4fc9e70
7841db2
f3d87e2
 
10dfcb1
 
 
 
 
f3d87e2
10dfcb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3d87e2
0a8cafa
 
7841db2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import gradio as gr
from huggingface_hub import login, InferenceClient
import spaces

# Authenticate with Hugging Face API
api_key = os.getenv("TOKEN")
login(api_key)

# Predefined list of models to compare (can be expanded)
model_options = {
    "Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
    "GPT-4": "TheBloke/Open_Gpt4_8x7B-GGUF",
    "Falcon-40B": "tiiuae/falcon-40b-instruct",
    "Mistral-7B": "mistralai/Mistral-7B-Instruct-v0.3",
    "Bloom": "bigscience/bloom",
}

# Initialize clients for models
clients = {name: InferenceClient(repo_id) for name, repo_id in model_options.items()}

# Define the response function
@spaces.GPU
def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
    selected_models,
):
    messages = [{"role": "system", "content": system_message}] + history
    messages.append({"role": "user", "content": message})

    responses = {}

    # Generate responses for each selected model
    for model_name in selected_models:
        client = clients[model_name]
        response = ""
        for token in client.chat_completion(
            messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
        ):
            delta = token.choices[0].delta.content
            response += delta
        responses[model_name] = response

    return responses

# Build Gradio app
def create_demo():
    with gr.Blocks() as demo:
        gr.Markdown("# AI Model Comparison Tool 🌟")
        gr.Markdown(
            """
            Compare responses from multiple AI models side-by-side.  
            Select models, ask a question, and vote for the best response!
            """
        )

        with gr.Row():
            system_message = gr.Textbox(
                value="You are a helpful assistant providing answers for technical and customer support queries.",
                label="System message"
            )
            user_message = gr.Textbox(label="Your question", placeholder="Type your question here...")

        with gr.Row():
            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
            )

        with gr.Row():
            selected_models = gr.CheckboxGroup(
                choices=list(model_options.keys()),
                label="Select models to compare",
                value=["Llama-3.1-70B", "GPT-4"],  # Default models
            )

        submit_button = gr.Button("Generate Responses")

        with gr.Row():
            response_boxes = []
            vote_buttons = []
            vote_counts = []

            # Dynamically create response sections for each model
            for model_name in model_options.keys():
                with gr.Column(visible=False) as column:  # Initially hide unused models
                    response_box = gr.Textbox(label=f"Response from {model_name}")
                    vote_button = gr.Button(f"Vote for {model_name}")
                    vote_count = gr.Number(value=0, label=f"Votes for {model_name}")
                    response_boxes.append((model_name, column, response_box, vote_button, vote_count))

        # Define visibility and update functions dynamically
        def update_model_visibility(models):
            for model_name, column, *_ in response_boxes:
                column.visible = model_name in models

        def handle_votes(vote_counts, model_name):
            index = list(model_options.keys()).index(model_name)
            vote_counts[index] += 1
            return vote_counts

        # Generate responses
        def generate_responses(
            message, history, system_message, max_tokens, temperature, top_p, selected_models
        ):
            responses = respond(
                message, history, system_message, max_tokens, temperature, top_p, selected_models
            )
            outputs = []
            for model_name, _, response_box, *_ in response_boxes:
                if model_name in responses:
                    outputs.append(responses[model_name])
                else:
                    outputs.append("")
            return outputs

        submit_button.click(
            generate_responses,
            inputs=[user_message, [], system_message, max_tokens, temperature, top_p, selected_models],
            outputs=[response[2] for response in response_boxes],
        )

        for model_name, _, _, vote_button, vote_count in response_boxes:
            vote_button.click(
                lambda votes, name=model_name: handle_votes(votes, name),
                inputs=[vote_counts],
                outputs=[vote_counts],
            )

        # Update model visibility when the model selection changes
        selected_models.change(
            update_model_visibility,
            inputs=[selected_models],
            outputs=[response[1] for response in response_boxes],
        )

    return demo

if __name__ == "__main__":
    demo = create_demo()
    demo.launch()