finetuning_subnet_leaderboard

Runtime error

File size: 7,085 Bytes

import gradio as gr
import bittensor as bt
import typing
from bittensor.extrinsics.serving import get_metadata
from dataclasses import dataclass
import requests
import wandb
import math
import os
import statistics
import time
from dotenv import load_dotenv
from huggingface_hub import HfApi
from apscheduler.schedulers.background import BackgroundScheduler

load_dotenv()

FONT = """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
TITLE = """<h1 align="center" id="space-title" class="typewriter">Subnet 6 Leaderboard</h1>"""
IMAGE = """<a href="https://discord.gg/jqVphNsB4H" target="_blank"><img src="https://i.ibb.co/88wyVQ7/nousgirl.png" alt="nousgirl" style="margin: auto; width: 20%; border: 0;" /></a>"""
HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/NousResearch/finetuning-subnet" target="_blank">Subnet 6</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that incentivizes the creation of the best open models by evaluating submissions on a constant stream of newly generated synthetic GPT-4 data. The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO.</h3>"""
DETAILS = """<b>Name</b> is the 🤗 Hugging Face model name (click to go to the model card). <b>Rewards / Day</b> are the expected rewards per day for each model. <b>Last Average Loss</b> is the last loss value on the evaluation data for the model as calculated by a validator (lower is better). <b>UID</b> is the Bittensor user id of the submitter. <b>Block</b> is the Bittensor block that the model was submitted in. More stats on <a href="https://taostats.io/subnets/netuid-6/" target="_blank">taostats</a>."""
VALIDATOR_WANDB_PROJECT = os.environ["VALIDATOR_WANDB_PROJECT"]
H4_TOKEN = os.environ.get("H4_TOKEN", None)
API = HfApi(token=H4_TOKEN)
REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
MAX_AVG_LOSS_POINTS = 5
METAGRAPH_RETRIES = 5
METAGRAPH_DELAY_SECS = 3

def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
    subtensor: bt.subtensor = bt.subtensor("finney")
    for i in range(0, METAGRAPH_RETRIES):
        try:
            metagraph: bt.metagraph = subtensor.metagraph(6, lite=False)
            return subtensor, metagraph
        except:
            if i == METAGRAPH_RETRIES - 1:
                raise
            time.sleep(METAGRAPH_DELAY_SECS)
    raise RuntimeError()

@dataclass
class ModelData:
    uid: int
    hotkey: str
    namespace: str
    name: str
    commit: str
    hash: str
    block: int
    incentive: float
    emission: float

    @classmethod
    def from_compressed_str(cls, uid: int, hotkey: str, cs: str, block: int, incentive: float, emission: float):
        """Returns an instance of this class from a compressed string representation"""
        tokens = cs.split(":")
        return ModelData(
            uid=uid,
            hotkey=hotkey,
            namespace=tokens[0],
            name=tokens[1],
            commit=tokens[2] if tokens[2] != "None" else None,
            hash=tokens[3] if tokens[3] != "None" else None,
            block=block,
            incentive=incentive,
            emission=emission
        )

def get_tao_price() -> float:
    for i in range(0, METAGRAPH_RETRIES):
        try:
            return float(requests.get("https://api.kucoin.com/api/v1/market/stats?symbol=TAO-USDT").json()["data"]["last"])
        except:
                if i == METAGRAPH_RETRIES - 1:
                    raise
                time.sleep(METAGRAPH_DELAY_SECS)
        raise RuntimeError()

def print_validator_weights(metagraph: bt.metagraph):
    for uid in metagraph.uids.tolist():
        if metagraph.validator_trust[uid].item() > 0:
            print(f"uid: {uid}")
            for ouid in metagraph.uids.tolist():
                if ouid == uid:
                    continue
                weight = round(metagraph.weights[uid][ouid].item(), 4)
                if weight > 0:
                    print(f"  {ouid} = {weight}")

def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.List[ModelData]:
    result = []
    for uid in metagraph.uids.tolist():
        hotkey = metagraph.hotkeys[uid]
        metadata = get_metadata(subtensor, metagraph.netuid, hotkey)
        if not metadata:
            continue

        commitment = metadata["info"]["fields"][0]
        hex_data = commitment[list(commitment.keys())[0]][2:]
        chain_str = bytes.fromhex(hex_data).decode()
        block = metadata["block"]
        incentive = metagraph.incentive[uid].nan_to_num().item()
        emission = metagraph.emission[uid].nan_to_num().item() * 20 # convert to daily TAO

        model_data = None
        try:
            model_data = ModelData.from_compressed_str(uid, hotkey, chain_str, block, incentive, emission)    
        except:
            continue

        result.append(model_data)
    return result

def get_avg_loss(uids: typing.List[int]) -> typing.Dict[int, float]:
    api = wandb.Api()
    runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
    runs.reverse()

    result = {}
    for run in runs:
        history = run.history()
        for uid in uids:
            if uid in result.keys():
                continue
            key = f"uid_data.{uid}"
            if key in history:
                data = [float(x) for x in list(history[key]) if (isinstance(x, float) and not math.isnan(x)) or isinstance(x, int) ][-MAX_AVG_LOSS_POINTS:]
                if len(data) > 0:
                    result[uid] = statistics.fmean(data)
        if len(result.keys()) == len(uids):
            break
    return result

subtensor, metagraph = get_subtensor_and_metagraph()

tao_price = get_tao_price()

leaderboard_df = get_subnet_data(subtensor, metagraph)
leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)

losses = get_avg_loss([x.uid for x in leaderboard_df])

demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
with demo:
    gr.HTML(FONT)
    gr.HTML(TITLE)
    gr.HTML(IMAGE)
    gr.HTML(HEADER)
    gr.HTML(DETAILS)

    value = [
        [
            f'[{c.namespace}/{c.name}](https://huggingface.co/{c.namespace}/{c.name})',
            f'${round(c.emission * tao_price, 2):,} (τ{round(c.emission, 2):,})',
            f'{round(losses[c.uid], 4) if c.uid in losses.keys() else ""}',
            c.uid,
            c.block
        ] for c in leaderboard_df
    ]
    value = [x for x in value if x[2] != ""] # filter out anything without a loss
    leaderboard_table = gr.components.Dataframe(
        value=value,
        headers=["Name", "Rewards / Day", "Last Average Loss", "UID", "Block"],
        datatype=["markdown", "str", "number", "number", "number"],
        elem_id="leaderboard-table",
        interactive=False,
        visible=True,
    )

def restart_space():
    API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=60 * 15) # restart every 15 minutes
scheduler.start()

demo.launch()