Spaces:

macrocosm-os
/

finetuning-leaderboard

Runtime error

File size: 6,302 Bytes

1f67d0f
 
 
 
17bb6e0
1f67d0f
 
 
 
 
 
a2dcddd
1f67d0f
 
 
 
 
17bb6e0
 
d60014f
1f67d0f
 
17bb6e0
1f67d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17bb6e0
1f67d0f
 
 
 
 
 
 
 
 
 
 
d60014f
1f67d0f
 
 
 
 
17bb6e0
 
a2dcddd
 
 
17bb6e0
 
 
 
 
 
 
 
1f67d0f
 
 
 
17bb6e0
1f67d0f
 
a2dcddd
1f67d0f
 
 
 
a2dcddd
1f67d0f
a2dcddd
1f67d0f

# Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py

import os
import datetime
from typing import Dict
import gradio as gr

from dotenv import load_dotenv
from huggingface_hub import HfApi
from apscheduler.schedulers.background import BackgroundScheduler

import competitions
import utils

FONT = (
    """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
)
TITLE = """<h1 align="center" id="space-title" class="typewriter">Finetuning Subnet Leaderboard</h1>"""
HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/finetuning" target="_blank">Finetuning</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.</h3>"""
EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-37/" target="_blank">taostats</a>."""
EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""

HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
SECONDS_PER_BLOCK = 12

load_dotenv()

HF_TOKEN = os.environ.get("HF_TOKEN", None)
API = HfApi(token=HF_TOKEN)


def get_next_update_div(current_block: int, next_update_block: int) -> str:
    now = datetime.datetime.now()
    blocks_to_go = next_update_block - current_block
    next_update_time = now + datetime.timedelta(
        seconds=blocks_to_go * SECONDS_PER_BLOCK
    )
    delta = next_update_time - now
    return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""


def get_last_updated_div() -> str:
    return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""


def restart_space():
    API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)


def main():
    # To avoid leaderboard failures, infinitely try until we get all data
    # needed to populate the dashboard

    state_vars = utils.load_state_vars()
    model_data = state_vars["model_data"]
    vali_runs = state_vars["vali_runs"]
    scores = state_vars["scores"]
    validator_df = state_vars["validator_df"]
    benchmarks = state_vars.get("benchmarks", None)
    benchmark_timestamp = state_vars.get("benchmark_timestamp", None)

    demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
    with demo:
        gr.HTML(FONT)
        gr.HTML(TITLE)
        gr.HTML(HEADER)

        # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
        # gr.HTML(value=get_next_update_div(current_block, next_epoch_block))

        # TODO: Figure out the best approach to showing the per competition rewards.
        gr.Label(
            value={
                f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
                for c in model_data
                if c.incentive
            },
            num_top_classes=10,
        )
        if benchmarks is not None:
            with gr.Accordion("Top Model Benchmarks"):
                gr.components.Dataframe(benchmarks)
                gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/macrocosm-os/finetuning/blob/dev/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
                gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")

        with gr.Accordion("Evaluation Stats"):
            gr.HTML(EVALUATION_HEADER)
            show_stale = gr.Checkbox(label="Show Stale", interactive=True)
            competition_leaderboards = []
            # TODO: Dynamically generate per-competition leaderboards based on model_data.
            competition_details = competitions.COMPETITION_DETAILS[1]
            with gr.Accordion(f"{competition_details.name} competition"):
                gr.HTML(competition_details.html_description)
                competition_leaderboards.append(gr.components.Dataframe(
                    value=utils.leaderboard_data(model_data, scores, show_stale.value),
                    headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
                    datatype=["markdown", "number", "number", "number", "number", "number"],
                    elem_id="leaderboard-table",
                    interactive=False,
                    visible=True,
                ))
            gr.HTML(EVALUATION_DETAILS)
            show_stale.change(
                lambda stale: utils.leaderboard_data(model_data, scores, stale),
                inputs=[show_stale],
                outputs=competition_leaderboards,
            )

            # TODO: Make this a multi-competition line plot
            gr.LinePlot(
                utils.get_losses_over_time(vali_runs),
                x="timestamp",
                x_title="Date",
                y="SN9_MODEL",
                y_title="Average Loss",
                tooltip="SN9_MODEL",
                interactive=True,
                visible=True,
                width=1024,
                title="Best Average Loss Over Time",
            )

        with gr.Accordion("Validator Stats"):
            gr.components.Dataframe(
                utils.make_validator_dataframe(validator_df, model_data),
                interactive=False,
                visible=True,
            )
        gr.HTML(value=get_last_updated_div())

    scheduler = BackgroundScheduler()
    scheduler.add_job(
        restart_space, "interval", seconds=60 * 30
    )  # restart every 15 minutes
    scheduler.start()

    demo.launch()


main()