Spaces:

AIEnergyScore
/

submission_portal

Running

File size: 8,568 Bytes

import os
import json
from datetime import datetime, timezone
from dataclasses import dataclass

import gradio as gr
from huggingface_hub import HfApi 
from huggingface_hub.hf_api import ModelInfo
from enum import Enum


OWNER = "EnergyStarAI"
COMPUTE_SPACE = f"{OWNER}/launch-computation-example"
REQUESTS_DATASET_PATH = f"{OWNER}/requests_debug"

TOKEN = os.environ.get("DEBUG")
API = HfApi(token=TOKEN)

tasks = ['asr.csv', 'object_detection.csv', 'text_classification.csv', 'image_captioning.csv',
        'question_answering.csv', 'text_generation.csv', 'image_classification.csv',
        'sentence_similarity.csv', 'image_generation.csv', 'summarization.csv']


##### Data classes need for the leaderboard Submit Model menu. #####
@dataclass
class ModelDetails:
    name: str
    display_name: str = ""
    symbol: str = "" # emoji

class WeightType(Enum):
    Adapter = ModelDetails("Adapter")
    Original = ModelDetails("Original")
    Delta = ModelDetails("Delta")

class Precision(Enum):
    float16 = ModelDetails("float16")
    bfloat16 = ModelDetails("bfloat16")
    float32 = ModelDetails("float32")
    bfloat32 = ModelDetails("bfloat32")
    Unknown = ModelDetails("?")

    def from_str(precision):
        if precision in ["torch.float16", "float16"]:
            return Precision.float16
        if precision in ["torch.bfloat16", "bfloat16"]:
            return Precision.bfloat16
        if precision in ["torch.bfloat32", "bfloat32"]:
            return Precision.bfloat32
        if precision in ["torch.float32", "float32"]:
            return Precision.float32
            
class ModelType(Enum):
    PT = ModelDetails(name="pretrained", symbol="🟢")
    FT = ModelDetails(name="fine-tuned", symbol="🔶")
    IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
    RL = ModelDetails(name="RL-tuned", symbol="🟦")
    Unknown = ModelDetails(name="", symbol="?")

    def to_str(self, separator=" "):
        return f"{self.value.symbol}{separator}{self.value.name}"

    @staticmethod
    def from_str(type):
        if "fine-tuned" in type or "🔶" in type:
            return ModelType.FT
        if "pretrained" in type or "🟢" in type:
            return ModelType.PT
        if "RL-tuned" in type or "🟦" in type:
            return ModelType.RL
        if "instruction-tuned" in type or "⭕" in type:
            return ModelType.IFT
        return ModelType.Unknown

##### End of classes required by the leaderboard Submit Model menu #####



def start_compute_space():
    API.restart_space(COMPUTE_SPACE)  
    return f"Okay! {COMPUTE_SPACE} should be running now!"


def get_model_size(model_info: ModelInfo, precision: str):
    """Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
    try:
        model_size = round(model_info.safetensors["total"] / 1e9, 3)
    except (AttributeError, TypeError):
        return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
    
    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
    model_size = size_factor * model_size
    return model_size


def add_new_eval(
    repo_id: str,
    revision: str,
    precision: str,
    weight_type: str,
    task: str,
):
    model_owner = repo_id.split("/")[0]
    model_name = repo_id.split("/")[1]
    precision = precision.split(" ")[0]
    
    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


    # Does the model actually exist?
    #if revision == "":
    revision = "main"

    # Is the model on the hub?
    #if weight_type in ["Delta", "Adapter"]:
    #    base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
    #    if not base_model_on_hub:
    #        return styled_error(f'Base model "{base_model}" {error}')

    #if not weight_type == "Adapter":
    #    model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
    #    if not model_on_hub:
    #        return styled_error(f'Model "{model}" {error}')

    # Is the model info correctly filled?
    try:
        model_info = API.model_info(repo_id=repo_id, revision=revision)
    except Exception:
        print("Could not find information for model %s at revision %s" % (model, revision))
        return
    #    return styled_error("Could not get your model information. Please fill it up properly.")

    model_size = get_model_size(model_info=model_info, precision=precision)

    # Were the model card and license filled?
    #try:
    #    license = model_info.cardData["license"]
    #except Exception:
    #    return styled_error("Please select a license for your model")

    #modelcard_OK, error_msg = check_model_card(model)
    #if not modelcard_OK:
    #    return styled_error(error_msg)

    # Seems good, creating the eval
    print("Adding request")

    request_dict = {
        "model": repo_id,
        "revision": revision,
        "precision": precision,
        "weight_type": weight_type,
        "status": "PENDING",
        "submitted_time": current_time,
        "task": task,
        "likes": model_info.likes,
        "params": model_size}
        #"license": license,
        #"private": False,
    #}

    # Check for duplicate submission
    #if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
    #    return styled_warning("This model has been already submitted.")


    out_file = f"{model_name}_eval_request_{precision}_{weight_type}.json"
    temp_out_path = f"./temp/{REQUESTS_DATASET_PATH}/{model_owner}/"
    temp_out_file = f"./temp/{REQUESTS_DATASET_PATH}/{model_owner}/{out_file}"
    print("Making directory to output results at %s" % temp_out_path)
    os.makedirs(temp_out_path, exist_ok=True)

    print("Writing out temp request file to %s" % temp_out_file)
    with open(temp_out_file, "w+") as f:
        f.write(json.dumps(request_dict))

    print("Uploading request to Dataset repo at %s" % REQUESTS_DATASET_PATH)
    API.upload_file(
        path_or_fileobj=temp_out_file,
        path_in_repo=f"{model_owner}/{out_file}",
        repo_id=REQUESTS_DATASET_PATH,
        repo_type="dataset",
        commit_message=f"Adding {model_name} to requests queue.",
    )

    # Remove the local file
    os.remove(temp_out_file)
    
    print("Starting compute space at %s " % COMPUTE_SPACE)
    return start_compute_space()


        

with gr.Blocks() as demo:
    gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
    gr.Markdown("## This is a super basic example 'frontend'.")
    gr.Markdown("## Fill out below then click **Run Analysis** to create the request file and launch the job.")
    gr.Markdown("### The request file will be written to [datasets/EnergyStarAI/requests_debug](https://huggingface.co/datasets/EnergyStarAI/requests_debug).")
    gr.Markdown("### The job will be launched at [EnergyStarAI/launch-computation-example](https://huggingface.co/spaces/EnergyStarAI/launch-computation-example).")
    with gr.Row():
        with gr.Column():
            model_name_textbox = gr.Textbox(label="Model name", value="lvwerra/distilbert-imdb")
            revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
            task = gr.Dropdown(
                choices=tasks,
                label="Tasks",
                multiselect=False,
                interactive=True,
            )

        with gr.Column():
            precision = gr.Dropdown(
                choices=[i.value.name for i in Precision if i != Precision.Unknown],
                label="Precision",
                multiselect=False,
                value="float16",
                interactive=True,
            )
            weight_type = gr.Dropdown(
                choices=[i.value.name for i in WeightType],
                label="Weights type",
                multiselect=False,
                value="Original",
                interactive=True,
            )

    with gr.Row():
        with gr.Column():
            submit_button = gr.Button("Run Analysis")
            submission_result = gr.Markdown()
            submit_button.click(
                fn=add_new_eval,
                inputs=[
                    model_name_textbox,
                    revision_name_textbox,
                    precision,
                    weight_type,
                    task,
                ],
                outputs=submission_result,
            )
    
demo.launch()