Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import gradio as gr | |
from huggingface_hub import login, InferenceClient | |
import spaces | |
# Authenticate with Hugging Face API | |
api_key = os.getenv("TOKEN") | |
login(api_key) | |
# Predefined list of models to compare (can be expanded) | |
model_options = { | |
"Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct", | |
"Qwen-2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct", | |
"Llama-3.2-1B": "meta-llama/Llama-3.2-1B", | |
"DeepSeek-V2.5": "deepseek-ai/DeepSeek-V2.5", | |
"Athene-V2-Chat": "Nexusflow/Athene-V2-Chat", | |
} | |
# Initialize clients for models | |
clients = {name: InferenceClient(repo_id) for name, repo_id in model_options.items()} | |
# Define the response function | |
def respond( | |
message, | |
history: list[dict], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
selected_models, | |
): | |
messages = [{"role": "system", "content": system_message}] + history | |
messages.append({"role": "user", "content": message}) | |
responses = {} | |
# Generate responses for each selected model | |
for model_name in selected_models: | |
client = clients[model_name] | |
response = "" | |
for token in client.chat_completion( | |
messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p | |
): | |
delta = token.choices[0].delta.content | |
response += delta | |
responses[model_name] = response | |
return responses | |
# Build Gradio app | |
def create_demo(): | |
with gr.Blocks() as demo: | |
gr.Markdown("# AI Model Comparison Tool π") | |
gr.Markdown( | |
""" | |
Compare responses from multiple AI models side-by-side. | |
Select models, ask a question, and vote for the best response! | |
""" | |
) | |
# Input Section | |
with gr.Row(): | |
system_message = gr.Textbox( | |
value="You are a helpful assistant providing answers for technical and customer support queries.", | |
label="System message" | |
) | |
user_message = gr.Textbox(label="Your question", placeholder="Type your question here...") | |
with gr.Row(): | |
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") | |
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") | |
top_p = gr.Slider( | |
minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" | |
) | |
# Model Selection Section | |
with gr.Row(): | |
selected_models = gr.CheckboxGroup( | |
choices=list(model_options.keys()), | |
label="Select models to compare", | |
value=["Llama-3.1-70B", "Qwen-2.5-1.5B-Instruct"], # Default models | |
) | |
# Dynamic Response Section | |
response_boxes = [] | |
vote_buttons = [] | |
vote_counts = [] | |
def generate_response_boxes(models): | |
response_boxes.clear() | |
for model_name in models: | |
with gr.Column() as response_column: | |
response_box = gr.Textbox(label=f"Response from {model_name}", interactive=False) | |
vote_button = gr.Button(f"Vote for {model_name}") | |
vote_count = gr.Number(value=0, label=f"Votes for {model_name}") | |
response_boxes.append((model_name, response_column, response_box, vote_button, vote_count)) | |
return response_boxes | |
# Add a button for generating responses | |
submit_button = gr.Button("Generate Responses") | |
vote_state = gr.State([0] * len(model_options)) # Initialize votes for all models | |
# Generate responses | |
def generate_responses( | |
message, history, system_message, max_tokens, temperature, top_p, selected_models | |
): | |
responses = respond( | |
message, history, system_message, max_tokens, temperature, top_p, selected_models | |
) | |
outputs = [] | |
for model_name, _, response_box, *_ in response_boxes: | |
outputs.append(responses.get(model_name, "")) | |
return outputs | |
# Handle votes | |
def handle_votes(votes, model_name): | |
index = list(model_options.keys()).index(model_name) | |
votes[index] += 1 | |
return votes | |
# Link button click to generate responses | |
submit_button.click( | |
generate_responses, | |
inputs=[user_message, gr.State([]), system_message, max_tokens, temperature, top_p, selected_models], | |
outputs=[response[2] for response in response_boxes], | |
) | |
# Link voting buttons to handle votes | |
for model_name, _, _, vote_button, vote_count in response_boxes: | |
vote_button.click( | |
lambda votes, name=model_name: handle_votes(votes, name), | |
inputs=[vote_state], | |
outputs=[vote_state, vote_count], | |
) | |
# Update response boxes when models are selected | |
selected_models.change( | |
generate_response_boxes, | |
inputs=[selected_models], | |
outputs=[response[1] for response in response_boxes], | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_demo() | |
demo.launch() | |