Hannah
not initial
f0ad9ed
raw
history blame
5.75 kB
import pandas as pd
import numpy as np
import gradio as gr
from urllib.parse import quote
def style_dataframe(df):
if len(df) == 0:
return df
highlight_cols = ["Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO"]
styled = df.style
def highlight_green(val):
try:
val_float = float(str(val).replace('%', '').replace(' kg', ''))
return f'background: linear-gradient(90deg, rgba(46, 125, 50, 0.5) {val_float}%, rgba(46, 125, 50, 0.1) {val_float}%); color: white;'
except:
return 'background-color: #121212; color: white;'
for col in highlight_cols:
styled = styled.applymap(highlight_green, subset=[col])
styled = styled.set_properties(
subset=["Model"],
**{'color': '#4da6ff'}
)
return styled
def create_leaderboard_data():
models = [
{"model_name": "meta-llama/llama-3-70b-instruct", "type": "open"},
{"model_name": "mistralai/Mistral-7B-Instruct-v0.3", "type": "open"},
{"model_name": "google/gemma-7b-it", "type": "open"},
{"model_name": "Qwen/Qwen2-7B-Instruct", "type": "open"},
{"model_name": "anthropic/claude-3-opus", "type": "closed"},
{"model_name": "OpenAI/gpt-4o", "type": "closed"},
{"model_name": "01-ai/Yi-1.5-34B-Chat", "type": "open"},
{"model_name": "google/gemma-2b", "type": "open"},
{"model_name": "microsoft/phi-3-mini-4k-instruct", "type": "open"},
{"model_name": "microsoft/phi-3-mini-128k-instruct", "type": "open"},
{"model_name": "stabilityai/stable-beluga-7b", "type": "open"},
{"model_name": "togethercomputer/RedPajama-INCITE-7B-Instruct", "type": "open"},
{"model_name": "databricks/dbrx-instruct", "type": "closed"},
{"model_name": "mosaicml/mpt-7b-instruct", "type": "open"},
{"model_name": "01-ai/Yi-1.5-9B-Chat", "type": "open"}
]
np.random.seed(42)
rows = []
for i, model in enumerate(models, 1):
model_name = model["model_name"]
model_type = model["type"]
emoji = "🟢" if model_type.lower() == "open" else "🔴"
type_with_emoji = f"{emoji} {model_type.upper()}"
if "/" in model_name:
org, name = model_name.split("/", 1)
model_link = f"[{model_name}](https://huggingface.co/{quote(model_name)})"
else:
model_link = f"[{model_name}](https://huggingface.co/models?search={quote(model_name)})"
average = round(np.random.uniform(40, 90), 2)
ifeval = round(np.random.uniform(30, 90), 2)
bbhi = round(np.random.uniform(40, 85), 2)
math = round(np.random.uniform(20, 80), 2)
gpqa = round(np.random.uniform(10, 70), 2)
mujb = round(np.random.uniform(10, 70), 2)
mmlu = round(np.random.uniform(40, 85), 2)
co2_cost = round(np.random.uniform(1, 100), 2)
rows.append([
i,
type_with_emoji,
model_link,
f"{average}",
f"{ifeval}",
f"{bbhi}",
f"{math}",
f"{gpqa}",
f"{mujb}",
f"{mmlu}",
f"{co2_cost} kg"
])
rows.sort(key=lambda x: float(x[3]), reverse=True)
for i, row in enumerate(rows, 1):
row[0] = i
df = pd.DataFrame(rows, columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"])
return style_dataframe(df)
def get_filter_data():
return {
"For Edge Devices": 5,
"For Consumers": 4,
"Mid-range": 4,
"For the GPU-rich": 3,
"Only Official Providers": 8
}
css = """
.html-container {
text-align: center;
display: flex;
justify-content: center;
width: 100%;
}
.dataframe-container {
margin-top: 0.5rem;
margin-bottom: 0.5rem;
}
.leaderboard-title {
font-size: 1.5rem;
font-weight: bold;
margin-bottom: 0.25rem;
color: #f0f0f0;
}
.leaderboard-subtitle {
font-size: 0.9rem;
margin-bottom: 1rem;
color: #a0a0a0;
}
.filters-container {
margin-bottom: 0.5rem;
}
"""
filter_data = get_filter_data()
filter_choices = [f"{key} · {value}" for key, value in filter_data.items()]
with gr.Blocks(css=css) as demo:
gr.HTML("""
<div style="display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
<div class="leaderboard-title">Open LLM Leaderboard</div>
</div>
<div class="leaderboard-subtitle">Comparing Large Language Models in an open and reproducible way</div>
""")
with gr.Row():
filters = gr.CheckboxGroup(
label="Quick Filters",
choices=filter_choices,
)
with gr.Row():
status_text = gr.HTML("<div style='text-align: right; color: #888; font-size: 0.8rem;'>Last updated: June 25, 2024 at 10:30 AM</div>")
leaderboard_df = create_leaderboard_data()
leaderboard_table = gr.Dataframe(
value=leaderboard_df,
headers=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"],
datatype=["number", "str", "markdown", "str", "str", "str", "str", "str", "str", "str", "str"],
elem_id="leaderboard-table",
interactive=False,
max_height=600,
show_search="search",
show_copy_button=True,
show_fullscreen_button=True,
pinned_columns=2,
column_widths=["5%", "7%", "35%", "7%", "7%", "7%", "7%", "7%", "7%", "7%", "6%"]
)
refresh_btn = gr.Button("Refresh Data")
refresh_btn.click(fn=lambda: create_leaderboard_data(), outputs=leaderboard_table)
if __name__ == "__main__":
demo.launch()