File size: 7,999 Bytes
1f67d0f
 
 
191e77c
1f67d0f
191e77c
92ec2a2
191e77c
1f67d0f
 
 
a2dcddd
1f67d0f
 
 
 
 
17bb6e0
 
191e77c
1f67d0f
17bb6e0
1f67d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b87de8
 
1f67d0f
 
 
 
 
 
 
 
191e77c
1f67d0f
 
 
 
 
 
 
 
92ec2a2
191e77c
1f67d0f
 
17bb6e0
92ec2a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b87de8
 
 
191e77c
 
838067a
191e77c
 
9b87de8
 
1f67d0f
92ec2a2
 
 
 
1f67d0f
17bb6e0
9b87de8
 
 
 
92ec2a2
9b87de8
 
92ec2a2
 
 
 
9b87de8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92ec2a2
 
 
 
 
 
 
9b87de8
 
 
 
 
 
1f67d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py

import datetime
import os

import gradio as gr
import matplotlib.pyplot as plt
from apscheduler.schedulers.background import BackgroundScheduler
from dotenv import load_dotenv
from huggingface_hub import HfApi

import competitions
import utils

FONT = (
    """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
)
TITLE = """<h1 align="center" id="space-title" class="typewriter">Finetuning Subnet Leaderboard</h1>"""
HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/finetuning" target="_blank">Finetuning</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.</h3>"""
EVALUATION_HEADER = """<h3 align="center">Shows the latest per-competition evaluation statistics as calculated by the Taoverse validator</h3>"""

HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
SECONDS_PER_BLOCK = 12

load_dotenv()

HF_TOKEN = os.environ.get("HF_TOKEN", None)
API = HfApi(token=HF_TOKEN)


def get_next_update_div(current_block: int, next_update_block: int) -> str:
    now = datetime.datetime.now()
    blocks_to_go = next_update_block - current_block
    next_update_time = now + datetime.timedelta(
        seconds=blocks_to_go * SECONDS_PER_BLOCK
    )
    delta = next_update_time - now
    return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""


def get_last_updated_div() -> str:
    return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""


def restart_space():
    API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)


def main():
    # To avoid leaderboard failures, infinitely try until we get all data
    # needed to populate the dashboard

    state_vars = utils.load_state_vars()
    model_data = state_vars["model_data"]
    vali_runs = state_vars["vali_runs"]
    scores = state_vars["scores"]
    validator_df = state_vars["validator_df"]
    benchmarks_df = state_vars["benchmarks_df"]
    benchmarks_targets = state_vars["benchmarks_targets"]

    demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
    with demo:
        gr.HTML(FONT)
        gr.HTML(TITLE)
        gr.HTML(HEADER)

        gr.Label(
            label="Emissions",
            value={
                f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
                for c in model_data
                if c.incentive
            },
            num_top_classes=10,
        )

        comp_ids = [2, 3]
        with gr.Accordion("Competition Results"):
            gr.HTML(EVALUATION_HEADER)
            show_stale = gr.Checkbox(label="Show Stale", interactive=True)
            competition_leaderboards = []
            for comp_id in comp_ids:
                details = competitions.COMPETITION_DETAILS[comp_id]
                with gr.Accordion(f"{details.name} Competition"):
                    gr.HTML(details.html_description)
                    competition_leaderboards.append(
                        gr.components.Dataframe(
                            value=utils.leaderboard_data(
                                model_data, scores, comp_id, show_stale.value
                            ),
                            headers=[
                                "Name",
                                "Win Rate",
                                "Score",
                                "Weight",
                                "UID",
                                "Block",
                            ],
                            datatype=[
                                "markdown",
                                "number",
                                "number",
                                "number",
                                "number",
                                "number",
                            ],
                            elem_id=f"comp{comp_id}-table",
                            interactive=False,
                            visible=True,
                        )
                    )
            gr.HTML(
                """
                    <ul><li><b>Name:</b> the 🤗 Hugging Face repo (click to go to the model card)</li>
                    <li><b>Win Rate:</b> % of head-to-head evals won vs. other eval'd models, given an epsilon advantage or disadvantage</li>
                    <li><b>Score:</b> the combined model score as calculated by the OTF validator (lower is better)</li>
                    <li><b>UID:</b> the Bittensor UID of the miner</li>
                    <li><b>Weight:</b> the bittensor weight set for this model</li>
                    <li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-37/" target="_blank">taostats</a>."""
            )
            show_stale.change(
                lambda stale: [
                    utils.leaderboard_data(model_data, scores, id, stale)
                    for id in comp_ids
                ],
                inputs=[show_stale],
                outputs=competition_leaderboards,
            )

        if benchmarks_df is not None:

            def create_benchmark_plot(benchmark: str, comp_id: int):
                fig = plt.figure(figsize=(10, 8))

                # Filter to just entries for this competition.
                df = benchmarks_df[benchmarks_df["competition_id"] == comp_id]

                plt.plot(df["timestamp"], df[benchmark])

                # Adding horizontal dotted lines for various benchmark targets (well-known models)
                for model, score in benchmarks_targets[benchmark].items():
                    plt.axhline(y=score, linestyle="--", label=f"{model}")
                    plt.text(
                        benchmarks_df["timestamp"].max(),
                        score,
                        f"{model}",
                        va="center",
                        ha="right",
                        backgroundcolor="white",
                    )

                # Adding labels and title
                plt.ylabel(benchmark.upper())
                plt.title(f"{benchmark.upper()} Over Time")
                plt.xticks(rotation=45)

                return fig

            with gr.Accordion("Top Model Benchmarks"):
                for comp_id in comp_ids:
                    details = competitions.COMPETITION_DETAILS[comp_id]
                    with gr.Accordion(f"{details.name} Benchmarks"):
                        mmlu = create_benchmark_plot("mmlu", comp_id)
                        mmlu_pro = create_benchmark_plot("mmlu_pro", comp_id)
                        gr.Plot(mmlu)
                        gr.Plot(mmlu_pro)
                gr.HTML(
                    """<div>Benchmarks computed using <a href='https://github.com/EleutherAI/lm-evaluation-harness'>lm-eval harness</a></div>"""
                )
                gr.HTML(
                    """<ul><li>MMLU: Raw score</li><li>MMLU Pro: Normalized score using <a href='https://huggingface.co/docs/leaderboards/open_llm_leaderboard/normalization'>this</a> method</li></ul>"""
                )

        with gr.Accordion("Validator Stats"):
            gr.components.Dataframe(
                utils.make_validator_dataframe(validator_df, model_data),
                interactive=False,
                visible=True,
            )
        gr.HTML(value=get_last_updated_div())

    scheduler = BackgroundScheduler()
    scheduler.add_job(
        restart_space, "interval", seconds=60 * 30
    )  # restart every 15 minutes
    scheduler.start()

    demo.launch()


main()