File size: 7,549 Bytes
1f67d0f
 
 
191e77c
1f67d0f
191e77c
 
1f67d0f
 
 
a2dcddd
1f67d0f
 
 
 
 
17bb6e0
 
191e77c
1f67d0f
17bb6e0
1f67d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191e77c
1f67d0f
 
 
 
 
 
 
 
 
 
d60014f
1f67d0f
 
191e77c
1f67d0f
 
17bb6e0
191e77c
 
 
 
17bb6e0
191e77c
17bb6e0
 
191e77c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17bb6e0
 
 
191e77c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f67d0f
191e77c
1f67d0f
17bb6e0
191e77c
1f67d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py

import datetime
import os

import gradio as gr
from apscheduler.schedulers.background import BackgroundScheduler
from dotenv import load_dotenv
from huggingface_hub import HfApi

import competitions
import utils

FONT = (
    """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
)
TITLE = """<h1 align="center" id="space-title" class="typewriter">Finetuning Subnet Leaderboard</h1>"""
HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/finetuning" target="_blank">Finetuning</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.</h3>"""
EVALUATION_HEADER = """<h3 align="center">Shows the latest per-competition evaluation statistics as calculated by the Taoverse validator</h3>"""

HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
SECONDS_PER_BLOCK = 12

load_dotenv()

HF_TOKEN = os.environ.get("HF_TOKEN", None)
API = HfApi(token=HF_TOKEN)


def get_next_update_div(current_block: int, next_update_block: int) -> str:
    now = datetime.datetime.now()
    blocks_to_go = next_update_block - current_block
    next_update_time = now + datetime.timedelta(
        seconds=blocks_to_go * SECONDS_PER_BLOCK
    )
    delta = next_update_time - now
    return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""


def get_last_updated_div() -> str:
    return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""


def restart_space():
    API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)


def main():
    # To avoid leaderboard failures, infinitely try until we get all data
    # needed to populate the dashboard

    state_vars = utils.load_state_vars()
    model_data = state_vars["model_data"]
    vali_runs = state_vars["vali_runs"]
    scores = state_vars["scores"]
    validator_df = state_vars["validator_df"]
    benchmarks = state_vars.get("benchmarks", None)
    benchmark_timestamp = state_vars.get("benchmark_timestamp", None)

    demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
    with demo:
        gr.HTML(FONT)
        gr.HTML(TITLE)
        gr.HTML(HEADER)

        gr.Label(
            label="Emissions",
            value={
                f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
                for c in model_data
                if c.incentive
            },
            num_top_classes=10,
        )
        if benchmarks is not None:
            with gr.Accordion("Top Model Benchmarks"):
                gr.components.Dataframe(benchmarks)
                gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/macrocosm-os/finetuning/blob/dev/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
                gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")

        with gr.Accordion("Competition Results"):
            gr.HTML(EVALUATION_HEADER)
            show_stale = gr.Checkbox(label="Show Stale", interactive=True)
            competition_leaderboards = []
            losses_1 = utils.get_losses_over_time(vali_runs, 1)
            comp_1 = competitions.COMPETITION_DETAILS[1]
            with gr.Accordion(f"{comp_1.name} Competition"):
                gr.HTML(comp_1.html_description)
                competition_leaderboards.append(gr.components.Dataframe(
                    value=utils.leaderboard_data(model_data, scores, 1, show_stale.value),
                    headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
                    datatype=["markdown", "number", "number", "number", "number", "number"],
                    elem_id="comp1-table",
                    interactive=False,
                    visible=True,
                ))
                gr.LinePlot(
                    losses_1,
                    x="timestamp",
                    x_title="Date",
                    y="losses",
                    y_title="Average Loss",
                    interactive=True,
                    visible=True,
                    width=1024,
                    title="Best Average Loss Over Time",
                )
            comp_2 = competitions.COMPETITION_DETAILS[2]
            losses_2 = utils.get_losses_over_time(vali_runs, 2)
            # Covert the losses into % of correct answers.
            losses_2["losses"] = losses_2["losses"].apply(lambda x: 1 - x if x else None)
            with gr.Accordion(f"{comp_2.name} Competition"):
                gr.HTML(comp_2.html_description)
                competition_leaderboards.append(gr.components.Dataframe(
                    value=utils.leaderboard_data(model_data, scores, 2, show_stale.value),
                    headers=["Name", "Win Rate", "MC Score", "Weight", "UID", "Block"],
                    datatype=["markdown", "number", "number", "number", "number", "number"],
                    elem_id="comp2-table",
                    interactive=False,
                    visible=True,
                ))
                gr.LinePlot(
                    losses_2,
                    x="timestamp",
                    x_title="Date",
                    y="losses",
                    y_title="MC Score",
                    interactive=True,
                    visible=True,
                    width=1024,
                    title="Best MC Score Over Time",
                )
            gr.HTML("""
                    <ul><li><b>Name:</b> the 🤗 Hugging Face repo (click to go to the model card)</li>
                    <li><b>Win Rate:</b> % of head-to-head evals won vs. other eval'd models, given an epsilon advantage or disadvantage</li>
                    <li><b>Average Loss:</b> the last loss value on the evaluation data for the model as calculated by the OTF validator (lower is better)</li>
                    <li><b>MC Score:</b> the % of correct multiple choice answers given by the model as calculated by the OTF validator (higher is better)</li>
                    <li><b>UID:</b> the Bittensor UID of the miner</li>
                    <li><b>Weight:</b> the bittensor weight set for this model</li>
                    <li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-37/" target="_blank">taostats</a>.""")
            show_stale.change(
                lambda stale: [utils.leaderboard_data(model_data, scores, 1, stale), utils.leaderboard_data(model_data, scores, 2, stale)],
                inputs=[show_stale],
                outputs=competition_leaderboards,
            )            

        with gr.Accordion("Validator Stats"):
            gr.components.Dataframe(
                utils.make_validator_dataframe(validator_df, model_data),
                interactive=False,
                visible=True,
            )
        gr.HTML(value=get_last_updated_div())

    scheduler = BackgroundScheduler()
    scheduler.add_job(
        restart_space, "interval", seconds=60 * 30
    )  # restart every 15 minutes
    scheduler.start()

    demo.launch()


main()