Spaces:
Running
Running
import pandas as pd | |
import numpy as np | |
import gradio as gr | |
from urllib.parse import quote | |
def style_dataframe(df): | |
if len(df) == 0: | |
return df | |
highlight_cols = ["Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO"] | |
styled = df.style | |
def highlight_green(val): | |
try: | |
val_float = float(str(val).replace('%', '').replace(' kg', '')) | |
return f'background: linear-gradient(90deg, rgba(46, 125, 50, 0.5) {val_float}%, rgba(46, 125, 50, 0.1) {val_float}%); color: white;' | |
except: | |
return 'background-color: #121212; color: white;' | |
for col in highlight_cols: | |
styled = styled.applymap(highlight_green, subset=[col]) | |
styled = styled.set_properties( | |
subset=["Model"], | |
**{'color': '#4da6ff'} | |
) | |
return styled | |
def create_leaderboard_data(): | |
models = [ | |
{"model_name": "meta-llama/llama-3-70b-instruct", "type": "open"}, | |
{"model_name": "mistralai/Mistral-7B-Instruct-v0.3", "type": "open"}, | |
{"model_name": "google/gemma-7b-it", "type": "open"}, | |
{"model_name": "Qwen/Qwen2-7B-Instruct", "type": "open"}, | |
{"model_name": "anthropic/claude-3-opus", "type": "closed"}, | |
{"model_name": "OpenAI/gpt-4o", "type": "closed"}, | |
{"model_name": "01-ai/Yi-1.5-34B-Chat", "type": "open"}, | |
{"model_name": "google/gemma-2b", "type": "open"}, | |
{"model_name": "microsoft/phi-3-mini-4k-instruct", "type": "open"}, | |
{"model_name": "microsoft/phi-3-mini-128k-instruct", "type": "open"}, | |
{"model_name": "stabilityai/stable-beluga-7b", "type": "open"}, | |
{"model_name": "togethercomputer/RedPajama-INCITE-7B-Instruct", "type": "open"}, | |
{"model_name": "databricks/dbrx-instruct", "type": "closed"}, | |
{"model_name": "mosaicml/mpt-7b-instruct", "type": "open"}, | |
{"model_name": "01-ai/Yi-1.5-9B-Chat", "type": "open"} | |
] | |
np.random.seed(42) | |
rows = [] | |
for i, model in enumerate(models, 1): | |
model_name = model["model_name"] | |
model_type = model["type"] | |
emoji = "🟢" if model_type.lower() == "open" else "🔴" | |
type_with_emoji = f"{emoji} {model_type.upper()}" | |
if "/" in model_name: | |
org, name = model_name.split("/", 1) | |
model_link = f"[{model_name}](https://huggingface.co/{quote(model_name)})" | |
else: | |
model_link = f"[{model_name}](https://huggingface.co/models?search={quote(model_name)})" | |
average = round(np.random.uniform(40, 90), 2) | |
ifeval = round(np.random.uniform(30, 90), 2) | |
bbhi = round(np.random.uniform(40, 85), 2) | |
math = round(np.random.uniform(20, 80), 2) | |
gpqa = round(np.random.uniform(10, 70), 2) | |
mujb = round(np.random.uniform(10, 70), 2) | |
mmlu = round(np.random.uniform(40, 85), 2) | |
co2_cost = round(np.random.uniform(1, 100), 2) | |
rows.append([ | |
i, | |
type_with_emoji, | |
model_link, | |
f"{average}", | |
f"{ifeval}", | |
f"{bbhi}", | |
f"{math}", | |
f"{gpqa}", | |
f"{mujb}", | |
f"{mmlu}", | |
f"{co2_cost} kg" | |
]) | |
rows.sort(key=lambda x: float(x[3]), reverse=True) | |
for i, row in enumerate(rows, 1): | |
row[0] = i | |
df = pd.DataFrame(rows, columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"]) | |
return style_dataframe(df) | |
def get_filter_data(): | |
return { | |
"For Edge Devices": 5, | |
"For Consumers": 4, | |
"Mid-range": 4, | |
"For the GPU-rich": 3, | |
"Only Official Providers": 8 | |
} | |
css = """ | |
.html-container { | |
text-align: center; | |
display: flex; | |
justify-content: center; | |
width: 100%; | |
} | |
.dataframe-container { | |
margin-top: 0.5rem; | |
margin-bottom: 0.5rem; | |
} | |
.leaderboard-title { | |
font-size: 1.5rem; | |
font-weight: bold; | |
margin-bottom: 0.25rem; | |
color: #f0f0f0; | |
} | |
.leaderboard-subtitle { | |
font-size: 0.9rem; | |
margin-bottom: 1rem; | |
color: #a0a0a0; | |
} | |
.filters-container { | |
margin-bottom: 0.5rem; | |
} | |
""" | |
filter_data = get_filter_data() | |
filter_choices = [f"{key} · {value}" for key, value in filter_data.items()] | |
with gr.Blocks(css=css) as demo: | |
gr.HTML(""" | |
<div style="display: flex; align-items: center; justify-content: center; margin-bottom: 10px;"> | |
<div class="leaderboard-title">Open LLM Leaderboard</div> | |
</div> | |
<div class="leaderboard-subtitle">Comparing Large Language Models in an open and reproducible way</div> | |
""") | |
with gr.Row(): | |
filters = gr.CheckboxGroup( | |
label="Quick Filters", | |
choices=filter_choices, | |
) | |
with gr.Row(): | |
status_text = gr.HTML("<div style='text-align: right; color: #888; font-size: 0.8rem;'>Last updated: June 25, 2024 at 10:30 AM</div>") | |
leaderboard_df = create_leaderboard_data() | |
leaderboard_table = gr.Dataframe( | |
value=leaderboard_df, | |
headers=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"], | |
datatype=["number", "str", "markdown", "str", "str", "str", "str", "str", "str", "str", "str"], | |
elem_id="leaderboard-table", | |
interactive=False, | |
max_height=600, | |
show_search="search", | |
show_copy_button=True, | |
show_fullscreen_button=True, | |
pinned_columns=2, | |
column_widths=["5%", "7%", "35%", "7%", "7%", "7%", "7%", "7%", "7%", "7%", "6%"] | |
) | |
refresh_btn = gr.Button("Refresh Data") | |
refresh_btn.click(fn=lambda: create_leaderboard_data(), outputs=leaderboard_table) | |
if __name__ == "__main__": | |
demo.launch() | |