Spaces:
Running
Running
import os, glob | |
import json | |
from datetime import datetime, timezone | |
from dataclasses import dataclass | |
from datasets import load_dataset, Dataset | |
import pandas as pd | |
import gradio as gr | |
from huggingface_hub import HfApi, snapshot_download, ModelInfo, list_models | |
from enum import Enum | |
OWNER = "EnergyStarAI" | |
COMPUTE_SPACE = f"{OWNER}/launch-computation-example" | |
TOKEN = os.environ.get("DEBUG") | |
API = HfApi(token=TOKEN) | |
task_mappings = {'automatic speech recognition':'automatic-speech-recognition', 'Object Detection': 'object-detection', 'Text Classification': 'text-classification', | |
'Image to Text':'image-to-text', 'Question Answering':'question-answering', 'Text Generation': 'text-generation', | |
'Image Classification':'image-classification', 'Sentence Similarity': 'sentence-similarity', | |
'Image Generation':'image-generation', 'Summarization':'summarization'} | |
class ModelDetails: | |
name: str | |
display_name: str = "" | |
symbol: str = "" # emoji | |
def start_compute_space(): | |
API.restart_space(COMPUTE_SPACE) | |
return f"Okay! {COMPUTE_SPACE} should be running now!" | |
def get_model_size(model_info: ModelInfo): | |
"""Gets the model size from the configuration, or the model name if the configuration does not contain the information.""" | |
try: | |
model_size = round(model_info.safetensors["total"] / 1e9, 3) | |
except (AttributeError, TypeError): | |
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py | |
return model_size | |
def add_new_eval( | |
repo_id: str, | |
task: str, | |
): | |
model_owner = repo_id.split("/")[0] | |
model_name = repo_id.split("/")[1] | |
model_list=[] | |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
requests= load_dataset("EnergyStarAI/requests_debug", split="test", token=TOKEN) | |
requests_dset = requests.to_pandas() | |
model_list= requests_dset[requests_dset['status'] == 'COMPLETED']['model'].tolist() | |
task_models = list(API.list_models(filter=task_mappings[task])) | |
task_model_names = [m.id for m in task_models] | |
if repo_id in model_list: | |
return 'This model has already been run!' | |
elif repo_id not in task_model_names: | |
return "This model isn't compatible with the chosen task! Pick a different model-task combination" | |
else: | |
# Is the model info correctly filled? | |
try: | |
model_info = API.model_info(repo_id=repo_id) | |
except Exception: | |
return "Could not find information for model %s" % (model) | |
model_size = get_model_size(model_info=model_info) | |
print("Adding request") | |
request_dict = { | |
"model": repo_id, | |
"status": "PENDING", | |
"submitted_time": pd.to_datetime(current_time), | |
"task": task, | |
"likes": model_info.likes, | |
"params": model_size, | |
"leaderboard_version": "v0",} | |
#"license": license, | |
#"private": False, | |
#} | |
print("Writing out request file to dataset") | |
df_request_dict = pd.DataFrame([request_dict]) | |
print(df_request_dict) | |
df_final = pd.concat([requests_dset, df_request_dict], ignore_index=True) | |
updated_dset =Dataset.from_pandas(df_final) | |
updated_dset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN) | |
print("Starting compute space at %s " % COMPUTE_SPACE) | |
return start_compute_space() | |
def print_existing_models(): | |
requests= load_dataset("EnergyStarAI/requests_debug", split="test", token=TOKEN) | |
requests_dset = requests.to_pandas() | |
model_list= requests_dset[requests_dset['status'] == 'COMPLETED'] | |
return model_list[['model','task']] | |
def get_leaderboard_models(): | |
path = r'leaderboard_v0_data/energy' | |
filenames = glob.glob(path + "/*.csv") | |
data = [] | |
for filename in filenames: | |
data.append(pd.read_csv(filename)) | |
leaderboard_data = pd.concat(data, ignore_index=True) | |
return leaderboard_data[['model','task']] | |
with gr.Blocks() as demo: | |
gr.Markdown("# Energy Star Submission Portal - v.0 (2024) π π» π") | |
gr.Markdown("## βοΈβ¨ Submit your model here!", elem_classes="markdown-text") | |
gr.Markdown("## Fill out below then click **Run Analysis** to create the request file and launch the job.") | |
gr.Markdown("## The [Project Leaderboard](https://huggingface.co/spaces/EnergyStarAI/2024_Leaderboard) will be updated quarterly, as new models get submitted.") | |
with gr.Row(): | |
with gr.Column(): | |
task = gr.Dropdown( | |
choices=task_mappings.keys(), | |
label="Choose a benchmark task", | |
value = 'Text Generation', | |
multiselect=False, | |
interactive=True, | |
) | |
with gr.Column(): | |
model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)") | |
with gr.Row(): | |
with gr.Column(): | |
submit_button = gr.Button("Run Analysis") | |
submission_result = gr.Markdown() | |
submit_button.click( | |
fn=add_new_eval, | |
inputs=[ | |
model_name_textbox, | |
task, | |
], | |
outputs=submission_result, | |
) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Accordion("Models that are in the latest leaderboard version:", open = False): | |
gr.Dataframe(get_leaderboard_models()) | |
with gr.Accordion("Models that have been benchmarked lately:", open = False): | |
gr.Dataframe(print_existing_models()) | |
demo.launch() |