Spaces:

AIEnergyScore
/

submission_portal

Running

App Files Files Community

submission_portal / app.py

sasha HF Staff

Update app.py

0f89166 verified 9 months ago

raw

history blame

5.75 kB

	import os, glob
	import json
	from datetime import datetime, timezone
	from dataclasses import dataclass
	from datasets import load_dataset, Dataset
	import pandas as pd
	import gradio as gr
	from huggingface_hub import HfApi, snapshot_download, ModelInfo, list_models
	from enum import Enum


	OWNER = "EnergyStarAI"
	COMPUTE_SPACE = f"{OWNER}/launch-computation-example"


	TOKEN = os.environ.get("DEBUG")
	API = HfApi(token=TOKEN)



	task_mappings = {'automatic speech recognition':'automatic-speech-recognition', 'Object Detection': 'object-detection', 'Text Classification': 'text-classification',
	'Image to Text':'image-to-text', 'Question Answering':'question-answering', 'Text Generation': 'text-generation',
	'Image Classification':'image-classification', 'Sentence Similarity': 'sentence-similarity',
	'Image Generation':'image-generation', 'Summarization':'summarization'}

	@dataclass
	class ModelDetails:
	name: str
	display_name: str = ""
	symbol: str = "" # emoji

	def start_compute_space():
	API.restart_space(COMPUTE_SPACE)
	return f"Okay! {COMPUTE_SPACE} should be running now!"


	def get_model_size(model_info: ModelInfo):
	"""Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
	try:
	model_size = round(model_info.safetensors["total"] / 1e9, 3)
	except (AttributeError, TypeError):
	return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
	return model_size


	def add_new_eval(
	repo_id: str,
	task: str,
	):
	model_owner = repo_id.split("/")[0]
	model_name = repo_id.split("/")[1]
	model_list=[]
	current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
	requests= load_dataset("EnergyStarAI/requests_debug", split="test", token=TOKEN)
	requests_dset = requests.to_pandas()
	model_list= requests_dset[requests_dset['status'] == 'COMPLETED']['model'].tolist()
	task_models = list(API.list_models(filter=task_mappings[task]))
	task_model_names = [m.id for m in task_models]
	if repo_id in model_list:
	return 'This model has already been run!'
	elif repo_id not in task_model_names:
	return "This model isn't compatible with the chosen task! Pick a different model-task combination"
	else:
	# Is the model info correctly filled?
	try:
	model_info = API.model_info(repo_id=repo_id)
	except Exception:
	return "Could not find information for model %s" % (model)

	model_size = get_model_size(model_info=model_info)

	print("Adding request")


	request_dict = {
	"model": repo_id,
	"status": "PENDING",
	"submitted_time": pd.to_datetime(current_time),
	"task": task,
	"likes": model_info.likes,
	"params": model_size,
	"leaderboard_version": "v0",}
	#"license": license,
	#"private": False,
	#}

	print("Writing out request file to dataset")
	df_request_dict = pd.DataFrame([request_dict])
	print(df_request_dict)
	df_final = pd.concat([requests_dset, df_request_dict], ignore_index=True)
	updated_dset =Dataset.from_pandas(df_final)
	updated_dset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)

	print("Starting compute space at %s " % COMPUTE_SPACE)
	return start_compute_space()

	def print_existing_models():
	requests= load_dataset("EnergyStarAI/requests_debug", split="test", token=TOKEN)
	requests_dset = requests.to_pandas()
	model_list= requests_dset[requests_dset['status'] == 'COMPLETED']
	return model_list[['model','task']]

	def get_leaderboard_models():
	path = r'leaderboard_v0_data/energy'
	filenames = glob.glob(path + "/*.csv")
	data = []
	for filename in filenames:
	data.append(pd.read_csv(filename))
	leaderboard_data = pd.concat(data, ignore_index=True)
	return leaderboard_data[['model','task']]


	with gr.Blocks() as demo:
	gr.Markdown("# Energy Star Submission Portal - v.0 (2024) 🌎 💻 🌟")
	gr.Markdown("## ✉️✨ Submit your model here!", elem_classes="markdown-text")
	gr.Markdown("## Fill out below then click Run Analysis to create the request file and launch the job.")
	gr.Markdown("## The [Project Leaderboard](https://huggingface.co/spaces/EnergyStarAI/2024_Leaderboard) will be updated quarterly, as new models get submitted.")
	with gr.Row():
	with gr.Column():
	task = gr.Dropdown(
	choices=task_mappings.keys(),
	label="Choose a benchmark task",
	value = 'Text Generation',
	multiselect=False,
	interactive=True,
	)
	with gr.Column():
	model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")

	with gr.Row():
	with gr.Column():
	submit_button = gr.Button("Run Analysis")
	submission_result = gr.Markdown()
	submit_button.click(
	fn=add_new_eval,
	inputs=[
	model_name_textbox,
	task,
	],
	outputs=submission_result,
	)
	with gr.Row():
	with gr.Column():
	with gr.Accordion("Models that are in the latest leaderboard version:", open = False):
	gr.Dataframe(get_leaderboard_models())
	with gr.Accordion("Models that have been benchmarked lately:", open = False):
	gr.Dataframe(print_existing_models())
	demo.launch()