Spaces:
Runtime error
Runtime error
File size: 7,999 Bytes
1f67d0f 191e77c 1f67d0f 191e77c 92ec2a2 191e77c 1f67d0f a2dcddd 1f67d0f 17bb6e0 191e77c 1f67d0f 17bb6e0 1f67d0f 9b87de8 1f67d0f 191e77c 1f67d0f 92ec2a2 191e77c 1f67d0f 17bb6e0 92ec2a2 9b87de8 191e77c 838067a 191e77c 9b87de8 1f67d0f 92ec2a2 1f67d0f 17bb6e0 9b87de8 92ec2a2 9b87de8 92ec2a2 9b87de8 92ec2a2 9b87de8 1f67d0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py
import datetime
import os
import gradio as gr
import matplotlib.pyplot as plt
from apscheduler.schedulers.background import BackgroundScheduler
from dotenv import load_dotenv
from huggingface_hub import HfApi
import competitions
import utils
FONT = (
"""<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
)
TITLE = """<h1 align="center" id="space-title" class="typewriter">Finetuning Subnet Leaderboard</h1>"""
HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/finetuning" target="_blank">Finetuning</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.</h3>"""
EVALUATION_HEADER = """<h3 align="center">Shows the latest per-competition evaluation statistics as calculated by the Taoverse validator</h3>"""
HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
SECONDS_PER_BLOCK = 12
load_dotenv()
HF_TOKEN = os.environ.get("HF_TOKEN", None)
API = HfApi(token=HF_TOKEN)
def get_next_update_div(current_block: int, next_update_block: int) -> str:
now = datetime.datetime.now()
blocks_to_go = next_update_block - current_block
next_update_time = now + datetime.timedelta(
seconds=blocks_to_go * SECONDS_PER_BLOCK
)
delta = next_update_time - now
return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
def get_last_updated_div() -> str:
return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
def restart_space():
API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)
def main():
# To avoid leaderboard failures, infinitely try until we get all data
# needed to populate the dashboard
state_vars = utils.load_state_vars()
model_data = state_vars["model_data"]
vali_runs = state_vars["vali_runs"]
scores = state_vars["scores"]
validator_df = state_vars["validator_df"]
benchmarks_df = state_vars["benchmarks_df"]
benchmarks_targets = state_vars["benchmarks_targets"]
demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
with demo:
gr.HTML(FONT)
gr.HTML(TITLE)
gr.HTML(HEADER)
gr.Label(
label="Emissions",
value={
f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
for c in model_data
if c.incentive
},
num_top_classes=10,
)
comp_ids = [2, 3]
with gr.Accordion("Competition Results"):
gr.HTML(EVALUATION_HEADER)
show_stale = gr.Checkbox(label="Show Stale", interactive=True)
competition_leaderboards = []
for comp_id in comp_ids:
details = competitions.COMPETITION_DETAILS[comp_id]
with gr.Accordion(f"{details.name} Competition"):
gr.HTML(details.html_description)
competition_leaderboards.append(
gr.components.Dataframe(
value=utils.leaderboard_data(
model_data, scores, comp_id, show_stale.value
),
headers=[
"Name",
"Win Rate",
"Score",
"Weight",
"UID",
"Block",
],
datatype=[
"markdown",
"number",
"number",
"number",
"number",
"number",
],
elem_id=f"comp{comp_id}-table",
interactive=False,
visible=True,
)
)
gr.HTML(
"""
<ul><li><b>Name:</b> the 🤗 Hugging Face repo (click to go to the model card)</li>
<li><b>Win Rate:</b> % of head-to-head evals won vs. other eval'd models, given an epsilon advantage or disadvantage</li>
<li><b>Score:</b> the combined model score as calculated by the OTF validator (lower is better)</li>
<li><b>UID:</b> the Bittensor UID of the miner</li>
<li><b>Weight:</b> the bittensor weight set for this model</li>
<li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-37/" target="_blank">taostats</a>."""
)
show_stale.change(
lambda stale: [
utils.leaderboard_data(model_data, scores, id, stale)
for id in comp_ids
],
inputs=[show_stale],
outputs=competition_leaderboards,
)
if benchmarks_df is not None:
def create_benchmark_plot(benchmark: str, comp_id: int):
fig = plt.figure(figsize=(10, 8))
# Filter to just entries for this competition.
df = benchmarks_df[benchmarks_df["competition_id"] == comp_id]
plt.plot(df["timestamp"], df[benchmark])
# Adding horizontal dotted lines for various benchmark targets (well-known models)
for model, score in benchmarks_targets[benchmark].items():
plt.axhline(y=score, linestyle="--", label=f"{model}")
plt.text(
benchmarks_df["timestamp"].max(),
score,
f"{model}",
va="center",
ha="right",
backgroundcolor="white",
)
# Adding labels and title
plt.ylabel(benchmark.upper())
plt.title(f"{benchmark.upper()} Over Time")
plt.xticks(rotation=45)
return fig
with gr.Accordion("Top Model Benchmarks"):
for comp_id in comp_ids:
details = competitions.COMPETITION_DETAILS[comp_id]
with gr.Accordion(f"{details.name} Benchmarks"):
mmlu = create_benchmark_plot("mmlu", comp_id)
mmlu_pro = create_benchmark_plot("mmlu_pro", comp_id)
gr.Plot(mmlu)
gr.Plot(mmlu_pro)
gr.HTML(
"""<div>Benchmarks computed using <a href='https://github.com/EleutherAI/lm-evaluation-harness'>lm-eval harness</a></div>"""
)
gr.HTML(
"""<ul><li>MMLU: Raw score</li><li>MMLU Pro: Normalized score using <a href='https://huggingface.co/docs/leaderboards/open_llm_leaderboard/normalization'>this</a> method</li></ul>"""
)
with gr.Accordion("Validator Stats"):
gr.components.Dataframe(
utils.make_validator_dataframe(validator_df, model_data),
interactive=False,
visible=True,
)
gr.HTML(value=get_last_updated_div())
scheduler = BackgroundScheduler()
scheduler.add_job(
restart_space, "interval", seconds=60 * 30
) # restart every 15 minutes
scheduler.start()
demo.launch()
main()
|