finetuning_subnet_leaderboard

Build error

App Files Files Community

emozilla commited on Feb 2, 2024

Commit

16cb654

1 Parent(s): 5efa315

update leaderboard

Browse files

Files changed (1) hide show

app.py +96 -29

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import requests
 import wandb
 import math
 import os
-import statistics
 import time
 from dotenv import load_dotenv
 from huggingface_hub import HfApi
@@ -19,14 +19,19 @@ FONT = """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="styles
 TITLE = """<h1 align="center" id="space-title" class="typewriter">Subnet 6 Leaderboard</h1>"""
 IMAGE = """<a href="https://discord.gg/jqVphNsB4H" target="_blank"><img src="https://i.ibb.co/88wyVQ7/nousgirl.png" alt="nousgirl" style="margin: auto; width: 20%; border: 0;" /></a>"""
 HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/NousResearch/finetuning-subnet" target="_blank">Subnet 6</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that incentivizes the creation of the best open models by evaluating submissions on a constant stream of newly generated synthetic GPT-4 data. The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO.</h3>"""
-DETAILS = """<b>Name</b> is the 🤗 Hugging Face model name (click to go to the model card). <b>Rewards / Day</b> are the expected rewards per day for each model. <b>Last Average Loss</b> is the last loss value on the evaluation data for the model as calculated by a validator (lower is better). <b>UID</b> is the Bittensor user id of the submitter. <b>Block</b> is the Bittensor block that the model was submitted in. More stats on <a href="https://taostats.io/subnets/netuid-6/" target="_blank">taostats</a>."""
 VALIDATOR_WANDB_PROJECT = os.environ["VALIDATOR_WANDB_PROJECT"]
 H4_TOKEN = os.environ.get("H4_TOKEN", None)
 API = HfApi(token=H4_TOKEN)
 REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
-MAX_AVG_LOSS_POINTS = 5
 METAGRAPH_RETRIES = 5
 METAGRAPH_DELAY_SECS = 3
 def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
     for i in range(0, METAGRAPH_RETRIES):
@@ -113,10 +118,25 @@ def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.
         result.append(model_data)
     return result
-def get_avg_loss(uids: typing.List[int]) -> typing.Dict[int, float]:
     api = wandb.Api()
     runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
-    runs.reverse()
     result = {}
     for run in runs:
@@ -124,50 +144,97 @@ def get_avg_loss(uids: typing.List[int]) -> typing.Dict[int, float]:
         for uid in uids:
             if uid in result.keys():
                 continue
-            key = f"uid_data.{uid}"
-            if key in history:
-                data = [float(x) for x in list(history[key]) if (isinstance(x, float) and not math.isnan(x)) or isinstance(x, int) ][-MAX_AVG_LOSS_POINTS:]
-                if len(data) > 0:
-                    result[uid] = statistics.fmean(data)
         if len(result.keys()) == len(uids):
             break
     return result
 subtensor, metagraph = get_subtensor_and_metagraph()
 tao_price = get_tao_price()
 leaderboard_df = get_subnet_data(subtensor, metagraph)
 leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
-losses = get_avg_loss([x.uid for x in leaderboard_df])
-demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
-with demo:
-    gr.HTML(FONT)
-    gr.HTML(TITLE)
-    gr.HTML(IMAGE)
-    gr.HTML(HEADER)
-    gr.HTML(DETAILS)
     value = [
         [
             f'[{c.namespace}/{c.name}](https://huggingface.co/{c.namespace}/{c.name})',
-            f'${round(c.emission * tao_price, 2):,} (τ{round(c.emission, 2):,})',
-            f'{round(losses[c.uid], 4) if c.uid in losses.keys() else ""}',
             c.uid,
             c.block
-        ] for c in leaderboard_df
     ]
-    value = [x for x in value if x[2] != ""] # filter out anything without a loss
-    leaderboard_table = gr.components.Dataframe(
-        value=value,
-        headers=["Name", "Rewards / Day", "Last Average Loss", "UID", "Block"],
-        datatype=["markdown", "str", "number", "number", "number"],
-        elem_id="leaderboard-table",
-        interactive=False,
-        visible=True,
     )
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)

 import wandb
 import math
 import os
+import datetime
 import time
 from dotenv import load_dotenv
 from huggingface_hub import HfApi
 TITLE = """<h1 align="center" id="space-title" class="typewriter">Subnet 6 Leaderboard</h1>"""
 IMAGE = """<a href="https://discord.gg/jqVphNsB4H" target="_blank"><img src="https://i.ibb.co/88wyVQ7/nousgirl.png" alt="nousgirl" style="margin: auto; width: 20%; border: 0;" /></a>"""
 HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/NousResearch/finetuning-subnet" target="_blank">Subnet 6</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that incentivizes the creation of the best open models by evaluating submissions on a constant stream of newly generated synthetic GPT-4 data. The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO.</h3>"""
+EVALUATION_DETAILS = """<b>Name</b> is the 🤗 Hugging Face model name (click to go to the model card). <b>Rewards / Day</b> are the expected rewards per day for each model. <b>Last Average Loss</b> is the last loss value on the evaluation data for the model as calculated by a validator (lower is better). <b>UID</b> is the Bittensor user id of the submitter. <b>Block</b> is the Bittensor block that the model was submitted in. More stats on <a href="https://taostats.io/subnets/netuid-6/" target="_blank">taostats</a>."""
+EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by a validator run by Nous Research</h3>"""
 VALIDATOR_WANDB_PROJECT = os.environ["VALIDATOR_WANDB_PROJECT"]
 H4_TOKEN = os.environ.get("H4_TOKEN", None)
 API = HfApi(token=H4_TOKEN)
 REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
+MAX_AVG_LOSS_POINTS = 1
 METAGRAPH_RETRIES = 5
 METAGRAPH_DELAY_SECS = 3
+NETUID = 6
+SUBNET_START_BLOCK = 2225782
+MIN_INCENTIVE_THRESHOLD = 0.01
+SECONDS_PER_BLOCK = 12
 def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
     for i in range(0, METAGRAPH_RETRIES):
         result.append(model_data)
     return result
+def floatable(x) -> bool:
+    return (isinstance(x, float) and not math.isnan(x) and not math.isinf(x)) or isinstance(x, int)
+def get_float_score(key: str, history) -> typing.Tuple[typing.Optional[float], bool]:
+    if key in history:
+        data = list(history[key])
+        if len(data) > 0:
+            if floatable(data[-1]):
+                return float(data[-1]), True
+            else:
+                data = [float(x) for x in data if floatable(x)]
+                if len(data) > 0:
+                    return float(data[-1]), False
+    return None, False
+def get_scores(uids: typing.List[int]) -> typing.Dict[int, typing.Dict[str, typing.Optional[float]]]:
     api = wandb.Api()
     runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
+    print(f"Top validator run: {runs[0].name}")
     result = {}
     for run in runs:
         for uid in uids:
             if uid in result.keys():
                 continue
+            avg_loss, avg_loss_fresh = get_float_score(f"uid_data.{uid}", history)
+            win_rate, win_rate_fresh = get_float_score(f"win_rate_data.{uid}", history)
+            win_total, win_total_fresh = get_float_score(f"win_total_data.{uid}", history)
+            result[uid] = {
+                "avg_loss": avg_loss,
+                "win_rate": win_rate,
+                "win_total": win_total,
+                "fresh": avg_loss_fresh and win_rate_fresh and win_total_fresh
+            }
         if len(result.keys()) == len(uids):
             break
     return result
+def format_score(uid, scores, key) -> typing.Optional[float]:
+    if uid in scores:
+        if key in scores[uid]:
+            point = scores[uid][key]
+            if floatable(point):
+                return round(scores[uid][key], 4)
+    return None
+def next_tempo(start_block, tempo, block):
+    start_num = start_block + tempo
+    intervals = (block - start_num) // tempo
+    nearest_num = start_num + ((intervals + 1) * tempo)
+    return nearest_num
 subtensor, metagraph = get_subtensor_and_metagraph()
+print_validator_weights(metagraph)
 tao_price = get_tao_price()
 leaderboard_df = get_subnet_data(subtensor, metagraph)
 leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
+scores = get_scores([x.uid for x in leaderboard_df])
+current_block = metagraph.block.item()
+next_update = next_tempo(
+    SUBNET_START_BLOCK,
+    subtensor.get_subnet_hyperparameters(NETUID).tempo,
+    current_block
+)
+blocks_to_go = next_update - current_block
+current_time = datetime.datetime.now()
+next_update_time = current_time + datetime.timedelta(seconds=blocks_to_go * SECONDS_PER_BLOCK)
+def get_next_update():
+    now = datetime.datetime.now()
+    delta = next_update_time - now
+    return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
+def leaderboard_data(show_stale: bool):
     value = [
         [
             f'[{c.namespace}/{c.name}](https://huggingface.co/{c.namespace}/{c.name})',
+            format_score(c.uid, scores, "win_rate"),
+            format_score(c.uid, scores, "avg_loss"),
             c.uid,
             c.block
+        ] for c in leaderboard_df if scores[c.uid]["fresh"] or show_stale
     ]
+    return value
+demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
+with demo:
+    gr.HTML(FONT)
+    gr.HTML(TITLE)
+    gr.HTML(IMAGE)
+    gr.HTML(HEADER)
+    gr.HTML(value=get_next_update())
+    gr.Label(
+        value={ f"{c.namespace}/{c.name} · ${round(c.emission * tao_price, 2):,} (τ{round(c.emission, 2):,})": c.incentive for c in leaderboard_df},
+        num_top_classes=10,
     )
+    with gr.Accordion("Evaluation Stats"):
+        gr.HTML(EVALUATION_HEADER)
+        show_stale = gr.Checkbox(label="Show Stale", interactive=True)
+        leaderboard_table = gr.components.Dataframe(
+            value=leaderboard_data(show_stale.value),
+            headers=["Name", "Win Rate", "Average Loss", "UID", "Block"],
+            datatype=["markdown", "number", "number", "number", "number", "number"],
+            elem_id="leaderboard-table",
+            interactive=False,
+            visible=True,
+        )
+        gr.HTML(EVALUATION_DETAILS)
+        show_stale.change(leaderboard_data, [show_stale], leaderboard_table)
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)