CryptAL commited on
Commit
4280433
·
1 Parent(s): 1eeabae

Added app code from RaoFoundation's

Browse files
Files changed (1) hide show
  1. app.py +437 -4
app.py CHANGED
@@ -1,7 +1,440 @@
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return name
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py
2
+ import argparse
3
+ import functools
4
+ import traceback
5
  import gradio as gr
6
+ import bittensor as bt
7
+ from typing import Dict, List, Any, Optional, Tuple
8
+ from bittensor.extrinsics.serving import get_metadata
9
+ from dataclasses import dataclass
10
+ import wandb
11
+ import math
12
+ import os
13
+ import datetime
14
+ import time
15
+ import json
16
+ import pandas as pd
17
+ from dotenv import load_dotenv
18
+ from huggingface_hub import HfApi
19
+ from apscheduler.schedulers.background import BackgroundScheduler
20
+ import pandas as pd
21
 
22
+ load_dotenv()
 
23
 
24
+ FONT = (
25
+ """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
26
+ )
27
+ TITLE = """<h1 align="center" id="space-title" class="typewriter">Subnet 9 Leaderboard</h1>"""
28
+ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/pretraining" target="_blank">Subnet 9</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing pretrained Foundation-Models on the <a href="https://huggingface.co/datasets/tiiuae/falcon-refinedweb" target="_blank">Falcon Refined Web dataset</a>. It acts like a continuous benchmark whereby miners are rewarded for attaining the best losses on randomly sampled pages of Falcon.<br/>The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO.</h3>"""
29
+ EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
30
+ EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
31
+ VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
32
+ BENCHMARK_WANDB_PROJECT = "raofoundation/pretraining-leaderboard-data"
33
+ H4_TOKEN = os.environ.get("H4_TOKEN", None)
34
+ API = HfApi(token=H4_TOKEN)
35
+ WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
36
+ SUBTENSOR_ENDPOINT=os.environ.get("SUBTENSOR_ENDPOINT", None)
37
+ REPO_ID = "RaoFoundation/pretraining-leaderboard"
38
+ MAX_AVG_LOSS_POINTS = 1
39
+ RETRIES = 5
40
+ DELAY_SECS = 3
41
+ NETUID = 9
42
+ SECONDS_PER_BLOCK = 12
43
+
44
+
45
+ @dataclass
46
+ class ModelData:
47
+ uid: int
48
+ hotkey: str
49
+ namespace: str
50
+ name: str
51
+ commit: str
52
+ hash: str
53
+ block: int
54
+ incentive: float
55
+ emission: float
56
+
57
+ @classmethod
58
+ def from_compressed_str(
59
+ cls,
60
+ uid: int,
61
+ hotkey: str,
62
+ cs: str,
63
+ block: int,
64
+ incentive: float,
65
+ emission: float,
66
+ ):
67
+ """Returns an instance of this class from a compressed string representation"""
68
+ tokens = cs.split(":")
69
+ return ModelData(
70
+ uid=uid,
71
+ hotkey=hotkey,
72
+ namespace=tokens[0],
73
+ name=tokens[1],
74
+ commit=tokens[2] if tokens[2] != "None" else None,
75
+ hash=tokens[3] if tokens[3] != "None" else None,
76
+ block=block,
77
+ incentive=incentive,
78
+ emission=emission,
79
+ )
80
+
81
+
82
+ def run_with_retries(func, *args, **kwargs):
83
+ for i in range(0, RETRIES):
84
+ try:
85
+ return func(*args, **kwargs)
86
+ except (Exception, RuntimeError):
87
+ if i == RETRIES - 1:
88
+ raise
89
+ time.sleep(DELAY_SECS)
90
+ raise RuntimeError("Should never happen")
91
+
92
+
93
+ def get_subtensor_and_metagraph() -> Tuple[bt.subtensor, bt.metagraph]:
94
+ def _internal() -> Tuple[bt.subtensor, bt.metagraph]:
95
+ if SUBTENSOR_ENDPOINT:
96
+ parser = argparse.ArgumentParser()
97
+ bt.subtensor.add_args(parser)
98
+ subtensor = bt.subtensor(config=bt.config(parser=parser, args=["--subtensor.chain_endpoint", SUBTENSOR_ENDPOINT]))
99
+ else:
100
+ subtensor = bt.subtensor("finney")
101
+ metagraph = subtensor.metagraph(NETUID, lite=False)
102
+ return subtensor, metagraph
103
+
104
+ return run_with_retries(_internal)
105
+
106
+
107
+ def get_validator_weights(
108
+ metagraph: bt.metagraph,
109
+ ) -> Dict[int, Tuple[float, int, Dict[int, float]]]:
110
+ """Returns a dictionary of validator UIDs to (vtrust, stake, {uid: weight})."""
111
+ ret = {}
112
+ for uid in metagraph.uids.tolist():
113
+ vtrust = metagraph.validator_trust[uid].item()
114
+ if vtrust > 0:
115
+ ret[uid] = (vtrust, metagraph.S[uid].item(), {})
116
+ for ouid in metagraph.uids.tolist():
117
+ if ouid == uid:
118
+ continue
119
+ weight = round(metagraph.weights[uid][ouid].item(), 4)
120
+ if weight > 0:
121
+ ret[uid][-1][ouid] = weight
122
+ return ret
123
+
124
+
125
+ def get_subnet_data(
126
+ subtensor: bt.subtensor, metagraph: bt.metagraph
127
+ ) -> List[ModelData]:
128
+ result = []
129
+ for uid in metagraph.uids.tolist():
130
+ hotkey = metagraph.hotkeys[uid]
131
+ metadata = None
132
+ try:
133
+ metadata = run_with_retries(
134
+ functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
135
+ )
136
+ except:
137
+ print(f"Failed to get metadata for UID {uid}: {traceback.format_exc()}")
138
+
139
+ if not metadata:
140
+ continue
141
+
142
+ commitment = metadata["info"]["fields"][0]
143
+ hex_data = commitment[list(commitment.keys())[0]][2:]
144
+ chain_str = bytes.fromhex(hex_data).decode()
145
+ block = metadata["block"]
146
+ incentive = metagraph.incentive[uid].nan_to_num().item()
147
+ emission = (
148
+ metagraph.emission[uid].nan_to_num().item() * 20
149
+ ) # convert to daily TAO
150
+
151
+ model_data = None
152
+ try:
153
+ model_data = ModelData.from_compressed_str(
154
+ uid, hotkey, chain_str, block, incentive, emission
155
+ )
156
+ except:
157
+ continue
158
+
159
+ result.append(model_data)
160
+ return result
161
+
162
+
163
+ def is_floatable(x) -> bool:
164
+ return (
165
+ isinstance(x, float) and not math.isnan(x) and not math.isinf(x)
166
+ ) or isinstance(x, int)
167
+
168
+
169
+ def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
170
+ """Get the latest runs from Wandb, retrying infinitely until we get them."""
171
+ while True:
172
+ api = wandb.Api(api_key=WANDB_TOKEN)
173
+ runs = list(
174
+ api.runs(
175
+ project,
176
+ filters=filters,
177
+ )
178
+ )
179
+ if len(runs) > 0:
180
+ return runs
181
+ # WandDB API is quite unreliable. Wait another minute and try again.
182
+ print("Failed to get runs from Wandb. Trying again in 60 seconds.")
183
+ time.sleep(60)
184
+
185
+
186
+ def get_scores(
187
+ uids: List[int],
188
+ wandb_runs: List,
189
+ ) -> Dict[int, Dict[str, Optional[float]]]:
190
+ result = {}
191
+ previous_timestamp = None
192
+ # Iterate through the runs until we've processed all the uids.
193
+ for i, run in enumerate(wandb_runs):
194
+ if not "original_format_json" in run.summary:
195
+ continue
196
+ data = json.loads(run.summary["original_format_json"])
197
+ all_uid_data = data["uid_data"]
198
+ timestamp = data["timestamp"]
199
+
200
+ # Make sure runs are indeed in descending time order.
201
+ assert (
202
+ previous_timestamp is None or timestamp < previous_timestamp
203
+ ), f"Timestamps are not in descending order: {timestamp} >= {previous_timestamp}"
204
+ previous_timestamp = timestamp
205
+
206
+ for uid in uids:
207
+ if uid in result:
208
+ continue
209
+ if str(uid) in all_uid_data:
210
+ uid_data = all_uid_data[str(uid)]
211
+ # Only the most recent run is fresh.
212
+ is_fresh = i == 0
213
+ result[uid] = {
214
+ "avg_loss": uid_data.get("average_loss", None),
215
+ "win_rate": uid_data.get("win_rate", None),
216
+ "win_total": uid_data.get("win_total", None),
217
+ "weight": uid_data.get("weight", None),
218
+ "fresh": is_fresh,
219
+ }
220
+ if len(result) == len(uids):
221
+ break
222
+ return result
223
+
224
+
225
+ def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
226
+ """Returns a dataframe of the best average model loss over time."""
227
+ timestamps = []
228
+ best_losses = []
229
+
230
+ for run in wandb_runs:
231
+ if "original_format_json" not in run.summary:
232
+ continue
233
+ data = json.loads(run.summary["original_format_json"])
234
+ all_uid_data = data["uid_data"]
235
+ timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
236
+ best_loss = math.inf
237
+ for _, uid_data in all_uid_data.items():
238
+ loss = uid_data.get("average_loss", math.inf)
239
+ # Filter out the numbers from the exploit and when validators lost the best model.
240
+ if loss < best_loss and (loss > 2.5 or timestamp > datetime.datetime(2024,2,12)) and (loss < 5 or timestamp > datetime.datetime(2024,3,27)):
241
+ best_loss = uid_data["average_loss"]
242
+ if best_loss != math.inf:
243
+ timestamps.append(timestamp)
244
+ best_losses.append(best_loss)
245
+
246
+ return pd.DataFrame({"timestamp": timestamps, "best_loss": best_losses})
247
+
248
+
249
+ def format_score(uid: int, scores, key) -> Optional[float]:
250
+ if uid in scores:
251
+ if key in scores[uid]:
252
+ point = scores[uid][key]
253
+ if is_floatable(point):
254
+ return round(scores[uid][key], 4)
255
+ return None
256
+
257
+
258
+ def next_epoch(subtensor: bt.subtensor, block: int) -> int:
259
+ return (
260
+ block
261
+ + subtensor.get_subnet_hyperparameters(NETUID).tempo
262
+ - subtensor.blocks_since_epoch(NETUID, block)
263
+ )
264
+
265
+
266
+ def get_next_update_div(current_block: int, next_update_block: int) -> str:
267
+ now = datetime.datetime.now()
268
+ blocks_to_go = next_update_block - current_block
269
+ next_update_time = now + datetime.timedelta(
270
+ seconds=blocks_to_go * SECONDS_PER_BLOCK
271
+ )
272
+ delta = next_update_time - now
273
+ return f"""<div align="center" style="font-size: larger;">Next reward update: <b>{blocks_to_go}</b> blocks (~{int(delta.total_seconds() // 60)} minutes)</div>"""
274
+
275
+
276
+ def get_last_updated_div() -> str:
277
+ return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
278
+
279
+
280
+ def leaderboard_data(
281
+ leaderboard: List[ModelData],
282
+ scores: Dict[int, Dict[str, Optional[float]]],
283
+ show_stale: bool,
284
+ ) -> List[List[Any]]:
285
+ """Returns the leaderboard data, based on models data and UID scores."""
286
+ return [
287
+ [
288
+ f"[{c.namespace}/{c.name} ({c.commit[0:8]})](https://huggingface.co/{c.namespace}/{c.name}/commit/{c.commit})",
289
+ format_score(c.uid, scores, "win_rate"),
290
+ format_score(c.uid, scores, "avg_loss"),
291
+ format_score(c.uid, scores, "weight"),
292
+ c.uid,
293
+ c.block,
294
+ ]
295
+ for c in leaderboard
296
+ if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
297
+ ]
298
+
299
+ def get_benchmarks() -> Tuple[pd.DataFrame, datetime.datetime]:
300
+ """Returns the latest benchmarks and the time they were run."""
301
+ runs = get_wandb_runs(project=BENCHMARK_WANDB_PROJECT, filters=None)
302
+ for run in runs:
303
+ artifacts = list(run.logged_artifacts())
304
+ if artifacts:
305
+ table = artifacts[-1].get("benchmarks")
306
+ if table:
307
+ return table.get_dataframe(), datetime.datetime.strptime(run.metadata["startedAt"], "%Y-%m-%dT%H:%M:%S.%f")
308
+ bt.logging.error("Failed to get benchmarks from Wandb.")
309
+ return None, None
310
+
311
+
312
+ def restart_space():
313
+ API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
314
+
315
+
316
+ def main():
317
+ # To avoid leaderboard failures, infinitely try until we get all data
318
+ # needed to populate the dashboard
319
+ while True:
320
+ try:
321
+ subtensor, metagraph = get_subtensor_and_metagraph()
322
+
323
+ model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
324
+ model_data.sort(key=lambda x: x.incentive, reverse=True)
325
+
326
+ vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
327
+
328
+ scores = get_scores([x.uid for x in model_data], vali_runs)
329
+
330
+ # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
331
+ # current_block = metagraph.block.item()
332
+ # next_epoch_block = next_epoch(subtensor, current_block)
333
+
334
+ validator_df = get_validator_weights(metagraph)
335
+ weight_keys = set()
336
+ for uid, stats in validator_df.items():
337
+ weight_keys.update(stats[-1].keys())
338
+
339
+ benchmarks, benchmark_timestamp = get_benchmarks()
340
+ break
341
+ except Exception as e:
342
+ print(f"Failed to get data: {e}")
343
+ time.sleep(30)
344
+
345
+ demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
346
+ with demo:
347
+ gr.HTML(FONT)
348
+ gr.HTML(TITLE)
349
+ gr.HTML(HEADER)
350
+
351
+ # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
352
+ # gr.HTML(value=get_next_update_div(current_block, next_epoch_block))
353
+
354
+ gr.Label(
355
+ value={
356
+ f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
357
+ for c in model_data
358
+ if c.incentive
359
+ },
360
+ num_top_classes=10,
361
+ )
362
+
363
+ if benchmarks is not None:
364
+ with gr.Accordion("Top Model Benchmarks"):
365
+ gr.components.Dataframe(benchmarks)
366
+ gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/RaoFoundation/pretraining/blob/dev/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
367
+ gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")
368
+
369
+ with gr.Accordion("Evaluation Stats"):
370
+ gr.HTML(EVALUATION_HEADER)
371
+ show_stale = gr.Checkbox(label="Show Stale", interactive=True)
372
+ leaderboard_table = gr.components.Dataframe(
373
+ value=leaderboard_data(model_data, scores, show_stale.value),
374
+ headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
375
+ datatype=["markdown", "number", "number", "number", "number", "number"],
376
+ elem_id="leaderboard-table",
377
+ interactive=False,
378
+ visible=True,
379
+ )
380
+ gr.HTML(EVALUATION_DETAILS)
381
+ show_stale.change(
382
+ lambda stale: leaderboard_data(model_data, scores, stale),
383
+ inputs=[show_stale],
384
+ outputs=leaderboard_table,
385
+ )
386
+
387
+ gr.LinePlot(
388
+ get_losses_over_time(vali_runs),
389
+ x="timestamp",
390
+ x_title="Date",
391
+ y="best_loss",
392
+ y_title="Average Loss",
393
+ tooltip="best_loss",
394
+ interactive=True,
395
+ visible=True,
396
+ width=1024,
397
+ title="Best Average Loss Over Time",
398
+ )
399
+
400
+ with gr.Accordion("Validator Stats"):
401
+ gr.components.Dataframe(
402
+ value=[
403
+ [uid, int(validator_df[uid][1]), round(validator_df[uid][0], 4)]
404
+ + [
405
+ validator_df[uid][-1].get(c.uid)
406
+ for c in model_data
407
+ if c.incentive
408
+ ]
409
+ for uid, _ in sorted(
410
+ zip(
411
+ validator_df.keys(),
412
+ [validator_df[x][1] for x in validator_df.keys()],
413
+ ),
414
+ key=lambda x: x[1],
415
+ reverse=True,
416
+ )
417
+ ],
418
+ headers=["UID", "Stake (τ)", "V-Trust"]
419
+ + [
420
+ f"{c.namespace}/{c.name} ({c.commit[0:8]})"
421
+ for c in model_data
422
+ if c.incentive
423
+ ],
424
+ datatype=["number", "number", "number"]
425
+ + ["number" for c in model_data if c.incentive],
426
+ interactive=False,
427
+ visible=True,
428
+ )
429
+ gr.HTML(value=get_last_updated_div())
430
+
431
+ scheduler = BackgroundScheduler()
432
+ scheduler.add_job(
433
+ restart_space, "interval", seconds=60 * 30
434
+ ) # restart every 15 minutes
435
+ scheduler.start()
436
+
437
+ demo.launch()
438
+
439
+
440
+ main()