import gradio as gr
import bittensor as bt
import typing
from bittensor.extrinsics.serving import get_metadata
from dataclasses import dataclass
import requests
import wandb
import math
import os
import statistics
from dotenv import load_dotenv
from huggingface_hub import HfApi
from apscheduler.schedulers.background import BackgroundScheduler
load_dotenv()
TITLE = """
Subnet 6 Leaderboard
"""
IMAGE = """"""
HEADER = """Subnet 6 is a Bittensor subnet that incentivizes the creation of the best open models by evaluating submissions on a constant stream of newly generated syntheic GPT-4 data. The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO."""
DETAILS = """Name is the 🤗 Hugging Face model name (click to go to the model card). Rewards / Day are the expected rewards per day for each model. Last Average Loss is the last loss value on the evaluation data for the model as calculated by a validator (lower is better). UID is the Bittensor user id of the submitter. More stats on taostats."""
VALIDATOR_WANDB_PROJECT = os.environ["VALIDATOR_WANDB_PROJECT"]
H4_TOKEN = os.environ.get("H4_TOKEN", None)
API = HfApi(token=H4_TOKEN)
REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
MAX_AVG_LOSS_POINTS = 5
subtensor = bt.subtensor("finney")
metagraph: bt.metagraph = subtensor.metagraph(6, lite=False)
@dataclass
class ModelData:
uid: int
hotkey: str
namespace: str
name: str
commit: str
hash: str
block: int
incentive: float
emission: float
@classmethod
def from_compressed_str(cls, uid: int, hotkey: str, cs: str, block: int, incentive: float, emission: float):
"""Returns an instance of this class from a compressed string representation"""
tokens = cs.split(":")
return ModelData(
uid=uid,
hotkey=hotkey,
namespace=tokens[0],
name=tokens[1],
commit=tokens[2] if tokens[2] != "None" else None,
hash=tokens[3] if tokens[3] != "None" else None,
block=block,
incentive=incentive,
emission=emission
)
def get_tao_price():
return float(requests.get("https://api.kucoin.com/api/v1/market/stats?symbol=TAO-USDT").json()["data"]["last"])
def print_validator_weights(metagraph: bt.metagraph):
for uid in metagraph.uids.tolist():
if metagraph.validator_trust[uid].item() > 0:
print(f"uid: {uid}")
for ouid in metagraph.uids.tolist():
if ouid == uid:
continue
weight = round(metagraph.weights[uid][ouid].item(), 4)
if weight > 0:
print(f" {ouid} = {weight}")
def get_subnet_data(metagraph: bt.metagraph) -> typing.List[ModelData]:
result = []
for uid in metagraph.uids.tolist():
hotkey = metagraph.hotkeys[uid]
metadata = get_metadata(subtensor, metagraph.netuid, hotkey)
if not metadata:
continue
commitment = metadata["info"]["fields"][0]
hex_data = commitment[list(commitment.keys())[0]][2:]
chain_str = bytes.fromhex(hex_data).decode()
block = metadata["block"]
incentive = metagraph.incentive[uid].nan_to_num().item()
emission = metagraph.emission[uid].nan_to_num().item() * 20 # convert to daily TAO
model_data = None
try:
model_data = ModelData.from_compressed_str(uid, hotkey, chain_str, block, incentive, emission)
except:
continue
result.append(model_data)
return result
def get_avg_loss(uids: typing.List[int]) -> typing.Dict[int, float]:
api = wandb.Api()
runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
runs.reverse()
result = {}
for run in runs:
history = run.history()
for uid in uids:
if uid in result.keys():
continue
key = f"uid_data.{uid}"
if key in history:
data = [x for x in list(history[key]) if not math.isnan(x)][-MAX_AVG_LOSS_POINTS:]
if len(data) > 0:
result[uid] = statistics.fmean(data)
if len(result.keys()) == len(uids):
break
return result
tao_price = get_tao_price()
leaderboard_df = get_subnet_data(metagraph)
leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
losses = get_avg_loss([x.uid for x in leaderboard_df])
demo = gr.Blocks()
with demo:
gr.HTML(TITLE)
gr.HTML(IMAGE)
gr.HTML(HEADER)
gr.HTML(DETAILS)
value = [
[
f'[{c.namespace}/{c.name}](https://huggingface.co/{c.namespace}/{c.name})',
f'${round(c.emission * tao_price, 2):,} (Ï„{round(c.emission, 2):,})',
f'{round(losses[c.uid], 4) if c.uid in losses.keys() else ""}',
c.uid
] for c in leaderboard_df
]
leaderboard_table = gr.components.Dataframe(
value=value,
headers=["Name", "Rewards / Day", "Last Average Loss", "UID",],
datatype=["markdown", "number", "number", "number"],
elem_id="leaderboard-table",
interactive=False,
visible=True,
)
def restart_space():
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=60 * 15) # restart every 15 minutes
scheduler.start()
demo.launch()