emozilla commited on
Commit
16cb654
·
1 Parent(s): 5efa315

update leaderboard

Browse files
Files changed (1) hide show
  1. app.py +96 -29
app.py CHANGED
@@ -7,7 +7,7 @@ import requests
7
  import wandb
8
  import math
9
  import os
10
- import statistics
11
  import time
12
  from dotenv import load_dotenv
13
  from huggingface_hub import HfApi
@@ -19,14 +19,19 @@ FONT = """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="styles
19
  TITLE = """<h1 align="center" id="space-title" class="typewriter">Subnet 6 Leaderboard</h1>"""
20
  IMAGE = """<a href="https://discord.gg/jqVphNsB4H" target="_blank"><img src="https://i.ibb.co/88wyVQ7/nousgirl.png" alt="nousgirl" style="margin: auto; width: 20%; border: 0;" /></a>"""
21
  HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/NousResearch/finetuning-subnet" target="_blank">Subnet 6</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that incentivizes the creation of the best open models by evaluating submissions on a constant stream of newly generated synthetic GPT-4 data. The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO.</h3>"""
22
- DETAILS = """<b>Name</b> is the 🤗 Hugging Face model name (click to go to the model card). <b>Rewards / Day</b> are the expected rewards per day for each model. <b>Last Average Loss</b> is the last loss value on the evaluation data for the model as calculated by a validator (lower is better). <b>UID</b> is the Bittensor user id of the submitter. <b>Block</b> is the Bittensor block that the model was submitted in. More stats on <a href="https://taostats.io/subnets/netuid-6/" target="_blank">taostats</a>."""
 
23
  VALIDATOR_WANDB_PROJECT = os.environ["VALIDATOR_WANDB_PROJECT"]
24
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
25
  API = HfApi(token=H4_TOKEN)
26
  REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
27
- MAX_AVG_LOSS_POINTS = 5
28
  METAGRAPH_RETRIES = 5
29
  METAGRAPH_DELAY_SECS = 3
 
 
 
 
30
 
31
  def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
32
  for i in range(0, METAGRAPH_RETRIES):
@@ -113,10 +118,25 @@ def get_subnet_data(subtensor: bt.subtensor, metagraph: bt.metagraph) -> typing.
113
  result.append(model_data)
114
  return result
115
 
116
- def get_avg_loss(uids: typing.List[int]) -> typing.Dict[int, float]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  api = wandb.Api()
118
  runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
119
- runs.reverse()
120
 
121
  result = {}
122
  for run in runs:
@@ -124,50 +144,97 @@ def get_avg_loss(uids: typing.List[int]) -> typing.Dict[int, float]:
124
  for uid in uids:
125
  if uid in result.keys():
126
  continue
127
- key = f"uid_data.{uid}"
128
- if key in history:
129
- data = [float(x) for x in list(history[key]) if (isinstance(x, float) and not math.isnan(x)) or isinstance(x, int) ][-MAX_AVG_LOSS_POINTS:]
130
- if len(data) > 0:
131
- result[uid] = statistics.fmean(data)
 
 
 
 
132
  if len(result.keys()) == len(uids):
133
  break
134
  return result
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  subtensor, metagraph = get_subtensor_and_metagraph()
 
137
 
138
  tao_price = get_tao_price()
139
 
140
  leaderboard_df = get_subnet_data(subtensor, metagraph)
141
  leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
142
 
143
- losses = get_avg_loss([x.uid for x in leaderboard_df])
144
 
145
- demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
146
- with demo:
147
- gr.HTML(FONT)
148
- gr.HTML(TITLE)
149
- gr.HTML(IMAGE)
150
- gr.HTML(HEADER)
151
- gr.HTML(DETAILS)
 
 
152
 
 
 
 
 
 
 
153
  value = [
154
  [
155
  f'[{c.namespace}/{c.name}](https://huggingface.co/{c.namespace}/{c.name})',
156
- f'${round(c.emission * tao_price, 2):,} (τ{round(c.emission, 2):,})',
157
- f'{round(losses[c.uid], 4) if c.uid in losses.keys() else ""}',
158
  c.uid,
159
  c.block
160
- ] for c in leaderboard_df
161
  ]
162
- value = [x for x in value if x[2] != ""] # filter out anything without a loss
163
- leaderboard_table = gr.components.Dataframe(
164
- value=value,
165
- headers=["Name", "Rewards / Day", "Last Average Loss", "UID", "Block"],
166
- datatype=["markdown", "str", "number", "number", "number"],
167
- elem_id="leaderboard-table",
168
- interactive=False,
169
- visible=True,
 
 
 
 
 
 
170
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  def restart_space():
173
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
 
7
  import wandb
8
  import math
9
  import os
10
+ import datetime
11
  import time
12
  from dotenv import load_dotenv
13
  from huggingface_hub import HfApi
 
19
  TITLE = """<h1 align="center" id="space-title" class="typewriter">Subnet 6 Leaderboard</h1>"""
20
  IMAGE = """<a href="https://discord.gg/jqVphNsB4H" target="_blank"><img src="https://i.ibb.co/88wyVQ7/nousgirl.png" alt="nousgirl" style="margin: auto; width: 20%; border: 0;" /></a>"""
21
  HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/NousResearch/finetuning-subnet" target="_blank">Subnet 6</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that incentivizes the creation of the best open models by evaluating submissions on a constant stream of newly generated synthetic GPT-4 data. The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO.</h3>"""
22
+ EVALUATION_DETAILS = """<b>Name</b> is the 🤗 Hugging Face model name (click to go to the model card). <b>Rewards / Day</b> are the expected rewards per day for each model. <b>Last Average Loss</b> is the last loss value on the evaluation data for the model as calculated by a validator (lower is better). <b>UID</b> is the Bittensor user id of the submitter. <b>Block</b> is the Bittensor block that the model was submitted in. More stats on <a href="https://taostats.io/subnets/netuid-6/" target="_blank">taostats</a>."""
23
+ EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by a validator run by Nous Research</h3>"""
24
  VALIDATOR_WANDB_PROJECT = os.environ["VALIDATOR_WANDB_PROJECT"]
25
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
26
  API = HfApi(token=H4_TOKEN)
27
  REPO_ID = "NousResearch/finetuning_subnet_leaderboard"
28
+ MAX_AVG_LOSS_POINTS = 1
29
  METAGRAPH_RETRIES = 5
30
  METAGRAPH_DELAY_SECS = 3
31
+ NETUID = 6
32
+ SUBNET_START_BLOCK = 2225782
33
+ MIN_INCENTIVE_THRESHOLD = 0.01
34
+ SECONDS_PER_BLOCK = 12
35
 
36
  def get_subtensor_and_metagraph() -> typing.Tuple[bt.subtensor, bt.metagraph]:
37
  for i in range(0, METAGRAPH_RETRIES):
 
118
  result.append(model_data)
119
  return result
120
 
121
+ def floatable(x) -> bool:
122
+ return (isinstance(x, float) and not math.isnan(x) and not math.isinf(x)) or isinstance(x, int)
123
+
124
+ def get_float_score(key: str, history) -> typing.Tuple[typing.Optional[float], bool]:
125
+ if key in history:
126
+ data = list(history[key])
127
+ if len(data) > 0:
128
+ if floatable(data[-1]):
129
+ return float(data[-1]), True
130
+ else:
131
+ data = [float(x) for x in data if floatable(x)]
132
+ if len(data) > 0:
133
+ return float(data[-1]), False
134
+ return None, False
135
+
136
+ def get_scores(uids: typing.List[int]) -> typing.Dict[int, typing.Dict[str, typing.Optional[float]]]:
137
  api = wandb.Api()
138
  runs = list(api.runs(VALIDATOR_WANDB_PROJECT))
139
+ print(f"Top validator run: {runs[0].name}")
140
 
141
  result = {}
142
  for run in runs:
 
144
  for uid in uids:
145
  if uid in result.keys():
146
  continue
147
+ avg_loss, avg_loss_fresh = get_float_score(f"uid_data.{uid}", history)
148
+ win_rate, win_rate_fresh = get_float_score(f"win_rate_data.{uid}", history)
149
+ win_total, win_total_fresh = get_float_score(f"win_total_data.{uid}", history)
150
+ result[uid] = {
151
+ "avg_loss": avg_loss,
152
+ "win_rate": win_rate,
153
+ "win_total": win_total,
154
+ "fresh": avg_loss_fresh and win_rate_fresh and win_total_fresh
155
+ }
156
  if len(result.keys()) == len(uids):
157
  break
158
  return result
159
 
160
+ def format_score(uid, scores, key) -> typing.Optional[float]:
161
+ if uid in scores:
162
+ if key in scores[uid]:
163
+ point = scores[uid][key]
164
+ if floatable(point):
165
+ return round(scores[uid][key], 4)
166
+ return None
167
+
168
+ def next_tempo(start_block, tempo, block):
169
+ start_num = start_block + tempo
170
+ intervals = (block - start_num) // tempo
171
+ nearest_num = start_num + ((intervals + 1) * tempo)
172
+ return nearest_num
173
+
174
  subtensor, metagraph = get_subtensor_and_metagraph()
175
+ print_validator_weights(metagraph)
176
 
177
  tao_price = get_tao_price()
178
 
179
  leaderboard_df = get_subnet_data(subtensor, metagraph)
180
  leaderboard_df.sort(key=lambda x: x.incentive, reverse=True)
181
 
182
+ scores = get_scores([x.uid for x in leaderboard_df])
183
 
184
+ current_block = metagraph.block.item()
185
+ next_update = next_tempo(
186
+ SUBNET_START_BLOCK,
187
+ subtensor.get_subnet_hyperparameters(NETUID).tempo,
188
+ current_block
189
+ )
190
+ blocks_to_go = next_update - current_block
191
+ current_time = datetime.datetime.now()
192
+ next_update_time = current_time + datetime.timedelta(seconds=blocks_to_go * SECONDS_PER_BLOCK)
193
 
194
+ def get_next_update():
195
+ now = datetime.datetime.now()
196
+ delta = next_update_time - now
197
+ return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
198
+
199
+ def leaderboard_data(show_stale: bool):
200
  value = [
201
  [
202
  f'[{c.namespace}/{c.name}](https://huggingface.co/{c.namespace}/{c.name})',
203
+ format_score(c.uid, scores, "win_rate"),
204
+ format_score(c.uid, scores, "avg_loss"),
205
  c.uid,
206
  c.block
207
+ ] for c in leaderboard_df if scores[c.uid]["fresh"] or show_stale
208
  ]
209
+ return value
210
+
211
+ demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
212
+ with demo:
213
+ gr.HTML(FONT)
214
+ gr.HTML(TITLE)
215
+ gr.HTML(IMAGE)
216
+ gr.HTML(HEADER)
217
+
218
+ gr.HTML(value=get_next_update())
219
+
220
+ gr.Label(
221
+ value={ f"{c.namespace}/{c.name} · ${round(c.emission * tao_price, 2):,} (τ{round(c.emission, 2):,})": c.incentive for c in leaderboard_df},
222
+ num_top_classes=10,
223
  )
224
+
225
+ with gr.Accordion("Evaluation Stats"):
226
+ gr.HTML(EVALUATION_HEADER)
227
+ show_stale = gr.Checkbox(label="Show Stale", interactive=True)
228
+ leaderboard_table = gr.components.Dataframe(
229
+ value=leaderboard_data(show_stale.value),
230
+ headers=["Name", "Win Rate", "Average Loss", "UID", "Block"],
231
+ datatype=["markdown", "number", "number", "number", "number", "number"],
232
+ elem_id="leaderboard-table",
233
+ interactive=False,
234
+ visible=True,
235
+ )
236
+ gr.HTML(EVALUATION_DETAILS)
237
+ show_stale.change(leaderboard_data, [show_stale], leaderboard_table)
238
 
239
  def restart_space():
240
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)