Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,316 Bytes
7841db2 0a8cafa 4fc9e70 297f353 7841db2 4fc9e70 7841db2 10dfcb1 7660535 10dfcb1 7841db2 602d4aa 7841db2 0a8cafa 7841db2 10dfcb1 7841db2 10dfcb1 7841db2 0a8cafa ae21d92 4fc9e70 7841db2 f3d87e2 10dfcb1 f3d87e2 10dfcb1 ae21d92 10dfcb1 ae21d92 10dfcb1 7660535 10dfcb1 ae21d92 10dfcb1 ae21d92 10dfcb1 ae21d92 10dfcb1 ae21d92 10dfcb1 ae21d92 10dfcb1 ae21d92 10dfcb1 1efa837 10dfcb1 ae21d92 10dfcb1 bc4762e ae21d92 10dfcb1 ae21d92 10dfcb1 ae21d92 10dfcb1 f3d87e2 0a8cafa ae21d92 0a8cafa 7841db2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import os
import gradio as gr
from huggingface_hub import login, InferenceClient
import spaces
# Authenticate with Hugging Face API
api_key = os.getenv("TOKEN")
login(api_key)
# Predefined list of models to compare (can be expanded)
model_options = {
"Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
"Qwen-2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
"Llama-3.2-1B": "meta-llama/Llama-3.2-1B",
"DeepSeek-V2.5": "deepseek-ai/DeepSeek-V2.5",
"Athene-V2-Chat": "Nexusflow/Athene-V2-Chat",
}
# Initialize clients for models
clients = {name: InferenceClient(repo_id) for name, repo_id in model_options.items()}
# Define the response function
@spaces.GPU
def respond(
message,
history: list[dict],
system_message,
max_tokens,
temperature,
top_p,
selected_models,
):
messages = [{"role": "system", "content": system_message}] + history
messages.append({"role": "user", "content": message})
responses = {}
# Generate responses for each selected model
for model_name in selected_models:
client = clients[model_name]
response = ""
for token in client.chat_completion(
messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
):
delta = token.choices[0].delta.content
response += delta
responses[model_name] = response
return responses
# Build Gradio app
def create_demo():
with gr.Blocks() as demo:
gr.Markdown("# AI Model Comparison Tool 🌟")
gr.Markdown(
"""
Compare responses from multiple AI models side-by-side.
Select models, ask a question, and vote for the best response!
"""
)
# Input Section
with gr.Row():
system_message = gr.Textbox(
value="You are a helpful assistant providing answers for technical and customer support queries.",
label="System message"
)
user_message = gr.Textbox(label="Your question", placeholder="Type your question here...")
with gr.Row():
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(
minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
)
# Model Selection Section
with gr.Row():
selected_models = gr.CheckboxGroup(
choices=list(model_options.keys()),
label="Select models to compare",
value=["Llama-3.1-70B", "Qwen-2.5-1.5B-Instruct"], # Default models
)
# Dynamic Response Section
response_boxes = []
vote_buttons = []
vote_counts = []
def generate_response_boxes(models):
response_boxes.clear()
for model_name in models:
with gr.Column() as response_column:
response_box = gr.Textbox(label=f"Response from {model_name}", interactive=False)
vote_button = gr.Button(f"Vote for {model_name}")
vote_count = gr.Number(value=0, label=f"Votes for {model_name}")
response_boxes.append((model_name, response_column, response_box, vote_button, vote_count))
return response_boxes
# Add a button for generating responses
submit_button = gr.Button("Generate Responses")
vote_state = gr.State([0] * len(model_options)) # Initialize votes for all models
# Generate responses
def generate_responses(
message, history, system_message, max_tokens, temperature, top_p, selected_models
):
responses = respond(
message, history, system_message, max_tokens, temperature, top_p, selected_models
)
outputs = []
for model_name, _, response_box, *_ in response_boxes:
outputs.append(responses.get(model_name, ""))
return outputs
# Handle votes
def handle_votes(votes, model_name):
index = list(model_options.keys()).index(model_name)
votes[index] += 1
return votes
# Link button click to generate responses
submit_button.click(
generate_responses,
inputs=[user_message, gr.State([]), system_message, max_tokens, temperature, top_p, selected_models],
outputs=[response[2] for response in response_boxes],
)
# Link voting buttons to handle votes
for model_name, _, _, vote_button, vote_count in response_boxes:
vote_button.click(
lambda votes, name=model_name: handle_votes(votes, name),
inputs=[vote_state],
outputs=[vote_state, vote_count],
)
# Update response boxes when models are selected
selected_models.change(
generate_response_boxes,
inputs=[selected_models],
outputs=[response[1] for response in response_boxes],
)
return demo
if __name__ == "__main__":
demo = create_demo()
demo.launch()
|