Spaces:
Sleeping
Sleeping
import pandas as pd | |
import requests | |
from urllib.parse import quote | |
import subprocess | |
import os | |
import time | |
import sys | |
from datetime import datetime | |
import gradio as gr | |
def style_dataframe(df): | |
if len(df) == 0: | |
return df | |
# Define the columns to highlight based on the screenshot | |
highlight_cols = ["Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO"] | |
# Initialize the styler | |
styled = df.style | |
# Function to create gradient background based on value | |
def highlight_green(val): | |
try: | |
# Extract numeric value from string (remove % if present) | |
val_float = float(str(val).replace('%', '').replace(' kg', '')) | |
# Create gradient background filling based on the value percentage | |
# Use the exact colors from the example | |
return f'background: linear-gradient(90deg, rgba(46, 125, 50, 0.5) {val_float}%, rgba(46, 125, 50, 0.1) {val_float}%); color: white;' | |
except: | |
return 'background-color: #121212; color: white;' | |
# Apply the highlighting to performance metric columns | |
for col in highlight_cols: | |
styled = styled.applymap(highlight_green, subset=[col]) | |
styled = styled.set_properties( | |
subset=["Model"], | |
**{'color': '#4da6ff'} | |
) | |
return styled | |
def increment_counter(counter): | |
return counter + 1 | |
def handle_select(evt: gr.SelectData, counter): | |
return counter + 1, f"{evt.index}", f"{evt.value}" | |
def start_api_server(): | |
api_process = subprocess.Popen( | |
[sys.executable, "api.py"], | |
cwd=os.path.dirname(os.path.abspath(__file__)) | |
) | |
# Give the API server a moment to start | |
time.sleep(2) | |
return api_process | |
def apply_filters(filters, models_data): | |
if not filters or len(filters) == 0: | |
# No filters selected, return all data | |
return models_data | |
filtered_models = [] | |
for model in models_data: | |
scores = model.get("scores", {}) | |
average_score = scores.get("average", 0) | |
model_name = model.get("model_name", "") | |
# Check which filters are selected and apply them | |
should_include = False | |
for filter_option in filters: | |
if "Edge Devices" in filter_option and average_score < 45 or "Consumers" in filter_option and 45 <= average_score < 55 or "Mid-range" in filter_option and 55 <= average_score < 65 or "GPU-rich" in filter_option and average_score >= 65 or "Official Providers" in filter_option and ("/" not in model_name or | |
model_name.startswith("meta/") or | |
model_name.startswith("google/") or | |
model_name.startswith("openai/") or | |
model_name.startswith("microsoft/")): | |
should_include = True | |
break | |
if should_include: | |
filtered_models.append(model) | |
return filtered_models | |
def format_timestamp(timestamp_str): | |
try: | |
# Parse ISO format timestamp | |
# Try to parse ISO format with timezone | |
try: | |
dt = datetime.fromisoformat(timestamp_str) | |
except: | |
# Fallback for different timestamp formats | |
dt = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S.%f") | |
# Format nicely for display | |
return dt.strftime("%B %d, %Y at %I:%M %p") | |
except Exception as e: | |
print(f"Error formatting timestamp: {e}") | |
return timestamp_str | |
def create_leaderboard_data(selected_filters=None): | |
try: | |
response = requests.get("http://localhost:8000/api/leaderboard") | |
if response.status_code == 200: | |
data = response.json() | |
models_data = data.get("models", []) | |
updated_at = data.get("updated_at", "Unknown") | |
formatted_time = format_timestamp(updated_at) | |
# Apply filters if any are selected | |
if selected_filters: | |
models_data = apply_filters(selected_filters, models_data) | |
rows = [] | |
for i, model in enumerate(models_data, 1): | |
model_name = model["model_name"] | |
model_type = model["type"] | |
scores = model["scores"] | |
co2_cost = model.get("co2_cost", "N/A") | |
# Only use green for open and red for closed | |
emoji = "馃煝" if model_type.lower() == "open" else "馃敶" | |
type_with_emoji = f"{emoji} {model_type.upper()}" | |
# Use model_link from API if available, otherwise create one | |
if "model_link" in model and model["model_link"]: | |
model_link = f"[{model_name}]({model['model_link']})" | |
# Format model name with link | |
elif "/" in model_name: | |
org, name = model_name.split("/", 1) | |
model_link = f"[{model_name}](https://huggingface.co/{quote(model_name)})" | |
else: | |
model_link = f"[{model_name}](https://huggingface.co/models?search={quote(model_name)})" | |
rows.append([ | |
i, # Rank | |
type_with_emoji, | |
model_link, | |
f"{scores.get('average', 0):.2f}", | |
f"{scores.get('ifeval', 0):.2f}", | |
f"{scores.get('bbhi', 0):.2f}", | |
f"{scores.get('math', 0):.2f}", | |
f"{scores.get('gpqa', 0):.2f}", | |
f"{scores.get('mujb', 0):.2f}", | |
f"{scores.get('mmlu', 0):.2f}", | |
f"{co2_cost}" if isinstance(co2_cost, (int, float)) else co2_cost | |
]) | |
df = pd.DataFrame(rows, columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"]) | |
styled_df = style_dataframe(df) | |
return styled_df, formatted_time | |
else: | |
# Return an empty dataframe with proper columns if API fails | |
empty_df = pd.DataFrame(columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"]) | |
return empty_df, "Unknown" | |
except Exception as e: | |
print(f"Error fetching leaderboard data: {e}") | |
# Return an empty dataframe with proper columns if API fails | |
empty_df = pd.DataFrame(columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"]) | |
return empty_df, "Unknown" | |
def load_svg(file_path="hf.svg"): | |
with open(file_path) as f: | |
svg_content = f.read() | |
return svg_content | |
def get_filter_data(): | |
try: | |
response = requests.get("http://localhost:8000/api/filters") | |
if response.status_code == 200: | |
filter_data = response.json() | |
return [ | |
f"For Edge Devices 路 {filter_data.get('edge_devices', 0)}", | |
f"For Consumers 路 {filter_data.get('consumers', 0)}", | |
f"Mid-range 路 {filter_data.get('midrange', 0)}", | |
f"For the GPU-rich 路 {filter_data.get('gpu_rich', 0)}", | |
f"Only Official Providers 路 {filter_data.get('official_providers', 0)}" | |
] | |
else: | |
return [ | |
"For Edge Devices 路 0", | |
"For Consumers 路 0", | |
"Mid-range 路 0", | |
"For the GPU-rich 路 0", | |
"Only Official Providers 路 0" | |
] | |
except Exception as e: | |
print(f"Error fetching filter data: {e}") | |
return [ | |
"For Edge Devices 路 0", | |
"For Consumers 路 0", | |
"Mid-range 路 0", | |
"For the GPU-rich 路 0", | |
"Only Official Providers 路 0" | |
] | |
def refresh_leaderboard(selected_filters=None): | |
try: | |
# Request a refresh from the API | |
requests.get("http://localhost:8000/api/leaderboard?refresh=true") | |
# Get updated data | |
df, timestamp = create_leaderboard_data(selected_filters) | |
filter_choices = get_filter_data() | |
return df, filter_choices, f"Last updated: {timestamp}" | |
except Exception as e: | |
print(f"Error refreshing data: {e}") | |
return None, None, "Error refreshing data" | |
def update_table(filters): | |
df, timestamp = create_leaderboard_data(filters) | |
return df, f"Last updated: {timestamp}" | |
def load_css(file_path="style.css"): | |
try: | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
css_path = os.path.join(current_dir, file_path) | |
with open(css_path) as f: | |
css_content = f.read() | |
return css_content | |
except Exception as e: | |
print(f"Error loading CSS file: {e}") | |
# Return a basic CSS if file not found | |
return """ | |
.dataframe-container { | |
border-radius: 8px; | |
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); | |
} | |
""" | |
with gr.Blocks(css=load_css()) as demo: | |
df, timestamp = create_leaderboard_data() | |
with gr.Row(): | |
svg_content = load_svg() | |
gr.HTML(svg_content) | |
gr.HTML(""" | |
<div style="display: flex; align-items: center; justify-content: center; margin-bottom: 10px;"> | |
<div class="leaderboard-title">Open LLM Leaderboard</div> | |
</div> | |
<div class="leaderboard-subtitle">Comparing Large Language Models in an open and reproducible way</div> | |
""") | |
status_text = gr.HTML(f"""<div style="text-align: center; margin-bottom: 10px;">Last updated: {timestamp}</div>""") | |
with gr.Row(elem_classes="filters-container"): | |
filter_choices = get_filter_data() | |
filters = gr.CheckboxGroup( | |
label="Quick Filters", | |
choices=filter_choices, | |
) | |
# Create and display the dataframe | |
leaderboard_table = gr.Dataframe( | |
value=df, | |
headers=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"], | |
datatype=["number", "str", "markdown", "str", "str", "str", "str", "str", "str", "str", "str"], | |
elem_id="leaderboard-table", | |
elem_classes="dataframe-container", | |
interactive=False, | |
max_height=600, | |
show_search="search", | |
show_copy_button=True, | |
show_fullscreen_button=True, | |
pinned_columns=2, | |
column_widths=["5%", "10%", "35%", "7%", "7%", "7%", "7%", "7%", "7%", "7%", "6%"] | |
) | |
refresh_btn = gr.Button("Refresh Data", elem_classes="refresh-btn") | |
refresh_btn.click(refresh_leaderboard, inputs=[filters], outputs=[leaderboard_table, filters, status_text]) | |
filters.change(update_table, inputs=[filters], outputs=[leaderboard_table, status_text]) | |
if __name__ == "__main__": | |
api_process = start_api_server() | |
demo.launch() | |
api_process.terminate() | |