Spaces:

hmb
/

leaderboard_dataframe

Sleeping

Hannah

not initial

f0ad9ed 4 months ago

5.75 kB

	import pandas as pd
	import numpy as np
	import gradio as gr
	from urllib.parse import quote

	def style_dataframe(df):
	if len(df) == 0:
	return df

	highlight_cols = ["Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO"]
	styled = df.style

	def highlight_green(val):
	try:
	val_float = float(str(val).replace('%', '').replace(' kg', ''))
	return f'background: linear-gradient(90deg, rgba(46, 125, 50, 0.5) {val_float}%, rgba(46, 125, 50, 0.1) {val_float}%); color: white;'
	except:
	return 'background-color: #121212; color: white;'

	for col in highlight_cols:
	styled = styled.applymap(highlight_green, subset=[col])

	styled = styled.set_properties(
	subset=["Model"],
	**{'color': '#4da6ff'}
	)

	return styled

	def create_leaderboard_data():
	models = [
	{"model_name": "meta-llama/llama-3-70b-instruct", "type": "open"},
	{"model_name": "mistralai/Mistral-7B-Instruct-v0.3", "type": "open"},
	{"model_name": "google/gemma-7b-it", "type": "open"},
	{"model_name": "Qwen/Qwen2-7B-Instruct", "type": "open"},
	{"model_name": "anthropic/claude-3-opus", "type": "closed"},
	{"model_name": "OpenAI/gpt-4o", "type": "closed"},
	{"model_name": "01-ai/Yi-1.5-34B-Chat", "type": "open"},
	{"model_name": "google/gemma-2b", "type": "open"},
	{"model_name": "microsoft/phi-3-mini-4k-instruct", "type": "open"},
	{"model_name": "microsoft/phi-3-mini-128k-instruct", "type": "open"},
	{"model_name": "stabilityai/stable-beluga-7b", "type": "open"},
	{"model_name": "togethercomputer/RedPajama-INCITE-7B-Instruct", "type": "open"},
	{"model_name": "databricks/dbrx-instruct", "type": "closed"},
	{"model_name": "mosaicml/mpt-7b-instruct", "type": "open"},
	{"model_name": "01-ai/Yi-1.5-9B-Chat", "type": "open"}
	]

	np.random.seed(42)

	rows = []
	for i, model in enumerate(models, 1):
	model_name = model["model_name"]
	model_type = model["type"]

	emoji = "🟢" if model_type.lower() == "open" else "🔴"
	type_with_emoji = f"{emoji} {model_type.upper()}"

	if "/" in model_name:
	org, name = model_name.split("/", 1)
	model_link = f"[{model_name}](https://huggingface.co/{quote(model_name)})"
	else:
	model_link = f"[{model_name}](https://huggingface.co/models?search={quote(model_name)})"

	average = round(np.random.uniform(40, 90), 2)
	ifeval = round(np.random.uniform(30, 90), 2)
	bbhi = round(np.random.uniform(40, 85), 2)
	math = round(np.random.uniform(20, 80), 2)
	gpqa = round(np.random.uniform(10, 70), 2)
	mujb = round(np.random.uniform(10, 70), 2)
	mmlu = round(np.random.uniform(40, 85), 2)
	co2_cost = round(np.random.uniform(1, 100), 2)

	rows.append([
	i,
	type_with_emoji,
	model_link,
	f"{average}",
	f"{ifeval}",
	f"{bbhi}",
	f"{math}",
	f"{gpqa}",
	f"{mujb}",
	f"{mmlu}",
	f"{co2_cost} kg"
	])

	rows.sort(key=lambda x: float(x[3]), reverse=True)

	for i, row in enumerate(rows, 1):
	row[0] = i

	df = pd.DataFrame(rows, columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"])
	return style_dataframe(df)

	def get_filter_data():
	return {
	"For Edge Devices": 5,
	"For Consumers": 4,
	"Mid-range": 4,
	"For the GPU-rich": 3,
	"Only Official Providers": 8
	}

	css = """
	.html-container {
	text-align: center;
	display: flex;
	justify-content: center;
	width: 100%;
	}

	.dataframe-container {
	margin-top: 0.5rem;
	margin-bottom: 0.5rem;
	}

	.leaderboard-title {
	font-size: 1.5rem;
	font-weight: bold;
	margin-bottom: 0.25rem;
	color: #f0f0f0;
	}

	.leaderboard-subtitle {
	font-size: 0.9rem;
	margin-bottom: 1rem;
	color: #a0a0a0;
	}

	.filters-container {
	margin-bottom: 0.5rem;
	}
	"""

	filter_data = get_filter_data()
	filter_choices = [f"{key} · {value}" for key, value in filter_data.items()]

	with gr.Blocks(css=css) as demo:
	gr.HTML("""
	<div style="display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
	<div class="leaderboard-title">Open LLM Leaderboard</div>
	</div>
	<div class="leaderboard-subtitle">Comparing Large Language Models in an open and reproducible way</div>
	""")

	with gr.Row():
	filters = gr.CheckboxGroup(
	label="Quick Filters",
	choices=filter_choices,
	)

	with gr.Row():
	status_text = gr.HTML("<div style='text-align: right; color: #888; font-size: 0.8rem;'>Last updated: June 25, 2024 at 10:30 AM</div>")

	leaderboard_df = create_leaderboard_data()
	leaderboard_table = gr.Dataframe(
	value=leaderboard_df,
	headers=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"],
	datatype=["number", "str", "markdown", "str", "str", "str", "str", "str", "str", "str", "str"],
	elem_id="leaderboard-table",
	interactive=False,
	max_height=600,
	show_search="search",
	show_copy_button=True,
	show_fullscreen_button=True,
	pinned_columns=2,
	column_widths=["5%", "7%", "35%", "7%", "7%", "7%", "7%", "7%", "7%", "7%", "6%"]
	)

	refresh_btn = gr.Button("Refresh Data")
	refresh_btn.click(fn=lambda: create_leaderboard_data(), outputs=leaderboard_table)

	if __name__ == "__main__":
	demo.launch()