Leaderboard_test

Sleeping

App Files Files Community

Leaderboard_test / app.py

bgamazay

Update app.py

4f8bac4 verified about 2 months ago

raw

history blame

9.94 kB

	import gradio as gr
	import pandas as pd
	import plotly.express as px

	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	CITATION_BUTTON_TEXT = r"""@misc{energystarai-leaderboard,
	author = {Sasha Luccioni and Boris Gamazaychikov and Emma Strubell and Sara Hooker and Yacine Jernite and Carole-Jean Wu and Margaret Mitchell},
	title = {AI Energy Score Leaderboard v.0},
	year = {2024},
	publisher = {Hugging Face},
	howpublished = "\url{https://huggingface.co/spaces/EnergyStarAI/2024_Leaderboard}",
	}"""

	# List of tasks (CSV filenames)
	tasks = [
	'asr.csv',
	'object_detection.csv',
	'text_classification.csv',
	'image_captioning.csv',
	'question_answering.csv',
	'text_generation.csv',
	'image_classification.csv',
	'sentence_similarity.csv',
	'image_generation.csv',
	'summarization.csv'
	]

	def format_stars(score):
	"""
	Convert the energy_score (assumed to be an integer from 1 to 5)
	into that many star characters wrapped in a span with the given color.
	"""
	try:
	score_int = int(score)
	except Exception:
	score_int = 0
	return f'<span style="color: #3fa45bff; font-size:1.2em;">{"★" * score_int}</span>'

	def make_link(mname):
	"""
	Create a markdown link from the model identifier.
	For example, if mname is "org/model", display "model" and link to its HF page.
	"""
	parts = str(mname).split('/')
	display_name = parts[1] if len(parts) > 1 else mname
	return f'[{display_name}](https://huggingface.co/{mname})'

	def get_plots(task):
	"""
	Read the energy CSV for a given task and return a Plotly scatter plot.
	The y-axis shows the total GPU energy (Wh) and the color is determined by energy_score.
	"""
	df = pd.read_csv('data/energy/' + task)
	# Ensure energy_score is an integer (for discrete color mapping)
	df['energy_score'] = df['energy_score'].astype(int)
	# Convert kWh to Wh and round to 4 decimal places.
	df['Total GPU Energy (Wh)'] = (df['total_gpu_energy'] * 1000).round(4)

	# Define a 5-level color mapping: 1 = red, 5 = green.
	color_map = {
	1: "red",
	2: "orange",
	3: "yellow",
	4: "lightgreen",
	5: "green"
	}

	fig = px.scatter(
	df,
	x="model",
	y="Total GPU Energy (Wh)",
	custom_data=['energy_score'],
	height=500,
	width=800,
	color="energy_score",
	color_discrete_map=color_map
	)
	fig.update_traces(
	hovertemplate="<br>".join([
	"Model: %{x}",
	"Total Energy (Wh): %{y}",
	"Energy Score: %{customdata[0]}"
	])
	)
	fig.update_layout(xaxis_title="Model", yaxis_title="Total GPU Energy (Wh)")
	return fig

	def get_all_plots():
	"""
	Combine data from all tasks and return a scatter plot.
	Duplicate models (if any) are dropped.
	"""
	all_df = pd.DataFrame()
	for task in tasks:
	df = pd.read_csv('data/energy/' + task)
	df['energy_score'] = df['energy_score'].astype(int)
	df['Total GPU Energy (Wh)'] = (df['total_gpu_energy'] * 1000).round(4)
	all_df = pd.concat([all_df, df], ignore_index=True)
	all_df = all_df.drop_duplicates(subset=['model'])

	color_map = {
	1: "red",
	2: "orange",
	3: "yellow",
	4: "lightgreen",
	5: "green"
	}
	fig = px.scatter(
	all_df,
	x="model",
	y="Total GPU Energy (Wh)",
	custom_data=['energy_score'],
	height=500,
	width=800,
	color="energy_score",
	color_discrete_map=color_map
	)
	fig.update_traces(
	hovertemplate="<br>".join([
	"Model: %{x}",
	"Total Energy (Wh): %{y}",
	"Energy Score: %{customdata[0]}"
	])
	)
	fig.update_layout(xaxis_title="Model", yaxis_title="Total GPU Energy (Wh)")
	return fig

	def get_model_names(task):
	"""
	For a given task, load the energy CSV and return a dataframe with three columns:
	- Model (a markdown link),
	- Rating (the star rating based on energy_score),
	- Total GPU Energy (Wh)
	"""
	df = pd.read_csv('data/energy/' + task)
	df['energy_score'] = df['energy_score'].astype(int)
	df['Total GPU Energy (Wh)'] = (df['total_gpu_energy'] * 1000).round(4)
	df['Model'] = df['model'].apply(make_link)
	df['Rating'] = df['energy_score'].apply(format_stars)
	df = df.sort_values(by='Total GPU Energy (Wh)')
	model_names = df[['Model', 'Rating', 'Total GPU Energy (Wh)']]
	return model_names

	def get_all_model_names():
	"""
	Combine data from all tasks and return a table of models.
	Duplicate models are dropped.
	"""
	all_df = pd.DataFrame()
	for task in tasks:
	df = pd.read_csv('data/energy/' + task)
	df['energy_score'] = df['energy_score'].astype(int)
	df['Total GPU Energy (Wh)'] = (df['total_gpu_energy'] * 1000).round(4)
	df['Model'] = df['model'].apply(make_link)
	df['Rating'] = df['energy_score'].apply(format_stars)
	all_df = pd.concat([all_df, df], ignore_index=True)
	all_df = all_df.drop_duplicates(subset=['model'])
	all_df = all_df.sort_values(by='Total GPU Energy (Wh)')
	model_names = all_df[['Model', 'Rating', 'Total GPU Energy (Wh)']]
	return model_names

	# Build the Gradio interface.
	demo = gr.Blocks()

	with demo:
	gr.Markdown(
	"""# AI Energy Score Leaderboard - v.0 (2024) 🌎 💻 🌟
	### Welcome to the leaderboard for the [AI Energy Score Project!](https://huggingface.co/EnergyStarAI)
	Click through the tasks below to see how different models measure up in terms of energy efficiency."""
	)
	gr.Markdown(
	"""Test your own models via the [submission portal](https://huggingface.co/spaces/AIEnergyScore/submission_portal)!"""
	)

	with gr.Tabs():
	with gr.TabItem("Text Generation 💬"):
	with gr.Row():
	with gr.Column(scale=1.3):
	plot = gr.Plot(get_plots('text_generation.csv'))
	with gr.Column(scale=1):
	table = gr.Dataframe(get_model_names('text_generation.csv'), datatype="markdown")

	with gr.TabItem("Image Generation 📷"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('image_generation.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('image_generation.csv'), datatype="markdown")

	with gr.TabItem("Text Classification 🎭"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('text_classification.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('text_classification.csv'), datatype="markdown")

	with gr.TabItem("Image Classification 🖼️"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('image_classification.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('image_classification.csv'), datatype="markdown")

	with gr.TabItem("Image Captioning 📝"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('image_captioning.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('image_captioning.csv'), datatype="markdown")

	with gr.TabItem("Summarization 📃"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('summarization.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('summarization.csv'), datatype="markdown")

	with gr.TabItem("Automatic Speech Recognition 💬"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('asr.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('asr.csv'), datatype="markdown")

	with gr.TabItem("Object Detection 🚘"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('object_detection.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('object_detection.csv'), datatype="markdown")

	with gr.TabItem("Sentence Similarity 📚"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('sentence_similarity.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('sentence_similarity.csv'), datatype="markdown")

	with gr.TabItem("Extractive QA ❔"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_plots('question_answering.csv'))
	with gr.Column():
	table = gr.Dataframe(get_model_names('question_answering.csv'), datatype="markdown")

	with gr.TabItem("All Tasks 💡"):
	with gr.Row():
	with gr.Column():
	plot = gr.Plot(get_all_plots)
	with gr.Column():
	table = gr.Dataframe(get_all_model_names, datatype="markdown")

	with gr.Accordion("📙 Citation", open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	elem_id="citation-button",
	lines=10,
	show_copy_button=True,
	)
	gr.Markdown(
	"""Last updated: February 2025"""
	)

	demo.launch()