steffenc commited on
Commit
e5a977e
·
1 Parent(s): 8810468

update scripts

Browse files
Files changed (2) hide show
  1. multigraph.py +30 -24
  2. multistats.py +3 -2
multigraph.py CHANGED
@@ -15,13 +15,13 @@ from meta_utils import load_metagraphs
15
  def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
16
 
17
  if subtensor is None:
18
- subtensor = bittensor.subtensor(network='finney')
19
-
20
- try:
21
  metagraph = subtensor.metagraph(block=block, netuid=netuid, lite=lite)
22
  if difficulty:
23
  metagraph.difficulty = subtensor.difficulty(block=block, netuid=netuid)
24
-
25
  if not lite:
26
  if half:
27
  metagraph.weights = torch.nn.Parameter(metagraph.weights.half(), requires_grad=False)
@@ -32,65 +32,71 @@ def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, r
32
  pickle.dump(metagraph, f)
33
 
34
  return metagraph if return_graph else True
35
-
36
  except Exception as e:
37
  print(f'Error processing block {block}: {e}')
38
-
39
 
40
  def parse_arguments():
41
  parser = argparse.ArgumentParser(description='Process metagraphs for a given network.')
42
  parser.add_argument('--netuid', type=int, default=1, help='Network UID to use.')
 
43
  parser.add_argument('--difficulty', action='store_true', help='Include difficulty in metagraph.')
44
  parser.add_argument('--prune_weights', action='store_true', help='Prune weights in metagraph.')
45
  parser.add_argument('--return_graph', action='store_true', help='Return metagraph instead of True.')
46
  parser.add_argument('--no_dataframe', action='store_true', help='Do not create dataframe.')
47
  parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
48
  parser.add_argument('--start_block', type=int, default=1_500_000, help='Start block.')
 
49
  parser.add_argument('--end_block', type=int, default=600_000, help='End block.')
50
  parser.add_argument('--step_size', type=int, default=100, help='Step size.')
 
51
  return parser.parse_args()
52
 
53
  if __name__ == '__main__':
54
 
55
- subtensor = bittensor.subtensor(network='finney')
56
  print(f'Current block: {subtensor.block}')
57
-
58
  args = parse_arguments()
59
-
 
60
  netuid=args.netuid
 
61
  difficulty=args.difficulty
62
- overwrite=False
63
- return_graph=args.return_graph
64
-
65
  step_size = args.step_size
66
  start_block = args.start_block
67
  start_block = (min(subtensor.block, start_block)//step_size)*step_size # round to nearest step_size
68
- end_block = args.end_block
 
 
 
 
69
  blocks = range(start_block, end_block, -step_size)
70
 
71
- # only get weights for multiple of 500 blocks
72
- lite=lambda x: x%500!=0
73
-
74
  max_workers = min(args.max_workers, len(blocks))
75
 
76
  datadir = f'data/metagraph/{netuid}'
77
  os.makedirs(datadir, exist_ok=True)
78
- if not overwrite:
79
  blocks = [block for block in blocks if not os.path.exists(f'data/metagraph/{netuid}/{block}.pkl')]
80
 
81
  metagraphs = []
82
-
83
  if len(blocks)>0:
84
-
85
  print(f'Processing {len(blocks)} blocks from {blocks[0]}-{blocks[-1]} using {max_workers} workers.')
86
-
87
  with ProcessPoolExecutor(max_workers=max_workers) as executor:
88
  futures = [
89
- executor.submit(process, block, lite=lite(block), netuid=netuid, difficulty=difficulty)
90
  for block in blocks
91
  ]
92
 
93
- success = 0
94
  with tqdm.tqdm(total=len(futures)) as pbar:
95
  for block, future in zip(blocks,futures):
96
  try:
@@ -103,7 +109,7 @@ if __name__ == '__main__':
103
 
104
  if not success:
105
  raise ValueError('No blocks were successfully processed.')
106
-
107
  print(f'Processed {success} blocks.')
108
  if return_graph:
109
  for metagraph in metagraphs:
@@ -125,7 +131,7 @@ if __name__ == '__main__':
125
  if len(blocks)==0:
126
  print('No blocks to process.')
127
  sys.exit(0)
128
-
129
  df = load_metagraphs(blocks[0], blocks[-1], block_step=step_size, datadir=datadir)
130
  if df_loaded is not None:
131
  df = pd.concat([df, df_loaded], ignore_index=True)
 
15
  def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
16
 
17
  if subtensor is None:
18
+ subtensor = bittensor.subtensor(network='archive')
19
+
20
+ try:
21
  metagraph = subtensor.metagraph(block=block, netuid=netuid, lite=lite)
22
  if difficulty:
23
  metagraph.difficulty = subtensor.difficulty(block=block, netuid=netuid)
24
+
25
  if not lite:
26
  if half:
27
  metagraph.weights = torch.nn.Parameter(metagraph.weights.half(), requires_grad=False)
 
32
  pickle.dump(metagraph, f)
33
 
34
  return metagraph if return_graph else True
35
+
36
  except Exception as e:
37
  print(f'Error processing block {block}: {e}')
38
+
39
 
40
  def parse_arguments():
41
  parser = argparse.ArgumentParser(description='Process metagraphs for a given network.')
42
  parser.add_argument('--netuid', type=int, default=1, help='Network UID to use.')
43
+ parser.add_argument('--lite', action='store_true', help='Do not include weights.')
44
  parser.add_argument('--difficulty', action='store_true', help='Include difficulty in metagraph.')
45
  parser.add_argument('--prune_weights', action='store_true', help='Prune weights in metagraph.')
46
  parser.add_argument('--return_graph', action='store_true', help='Return metagraph instead of True.')
47
  parser.add_argument('--no_dataframe', action='store_true', help='Do not create dataframe.')
48
  parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
49
  parser.add_argument('--start_block', type=int, default=1_500_000, help='Start block.')
50
+ parser.add_argument('--num_blocks', type=int, default=0, help='Number of blocks.')
51
  parser.add_argument('--end_block', type=int, default=600_000, help='End block.')
52
  parser.add_argument('--step_size', type=int, default=100, help='Step size.')
53
+ parser.add_argument('--overwrite', action='store_true',help='Overwrite existing files')
54
  return parser.parse_args()
55
 
56
  if __name__ == '__main__':
57
 
58
+ subtensor = bittensor.subtensor(network='archive')
59
  print(f'Current block: {subtensor.block}')
60
+
61
  args = parse_arguments()
62
+ print(args)
63
+
64
  netuid=args.netuid
65
+ lite=args.lite
66
  difficulty=args.difficulty
67
+ return_graph=args.return_graph
68
+
 
69
  step_size = args.step_size
70
  start_block = args.start_block
71
  start_block = (min(subtensor.block, start_block)//step_size)*step_size # round to nearest step_size
72
+ if args.num_blocks:
73
+ end_block = start_block - int(args.num_blocks*step_size)
74
+ else:
75
+ end_block = args.end_block
76
+
77
  blocks = range(start_block, end_block, -step_size)
78
 
79
+
 
 
80
  max_workers = min(args.max_workers, len(blocks))
81
 
82
  datadir = f'data/metagraph/{netuid}'
83
  os.makedirs(datadir, exist_ok=True)
84
+ if not args.overwrite:
85
  blocks = [block for block in blocks if not os.path.exists(f'data/metagraph/{netuid}/{block}.pkl')]
86
 
87
  metagraphs = []
88
+
89
  if len(blocks)>0:
90
+
91
  print(f'Processing {len(blocks)} blocks from {blocks[0]}-{blocks[-1]} using {max_workers} workers.')
92
+
93
  with ProcessPoolExecutor(max_workers=max_workers) as executor:
94
  futures = [
95
+ executor.submit(process, block, lite=args.lite, netuid=netuid, difficulty=difficulty)
96
  for block in blocks
97
  ]
98
 
99
+ success = 0
100
  with tqdm.tqdm(total=len(futures)) as pbar:
101
  for block, future in zip(blocks,futures):
102
  try:
 
109
 
110
  if not success:
111
  raise ValueError('No blocks were successfully processed.')
112
+
113
  print(f'Processed {success} blocks.')
114
  if return_graph:
115
  for metagraph in metagraphs:
 
131
  if len(blocks)==0:
132
  print('No blocks to process.')
133
  sys.exit(0)
134
+
135
  df = load_metagraphs(blocks[0], blocks[-1], block_step=step_size, datadir=datadir)
136
  if df_loaded is not None:
137
  df = pd.concat([df, df_loaded], ignore_index=True)
multistats.py CHANGED
@@ -16,7 +16,7 @@ from IPython.display import display
16
  api= wandb.Api(timeout=60)
17
  wandb.login(anonymous="allow")
18
 
19
- def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
20
  # TODO: speed this up by storing older runs
21
 
22
  all_runs = api.runs(project, filters=filters)
@@ -129,7 +129,7 @@ def load_data(run_id, run_path=None, load=True, save=False, explode=True):
129
 
130
  # Clean and explode dataframe
131
  # overwrite object to free memory
132
- float_cols = df.filter(regex='reward').columns
133
  df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
134
 
135
  if save:
@@ -156,6 +156,7 @@ def calculate_stats(df_long, freq='H', save_path=None, ntop=3 ):
156
  run_id = df_long['run_id'].iloc[0]
157
  # print(f'Calculating stats for run {run_id!r} dataframe with shape {df_long.shape}')
158
 
 
159
  # Approximate number of tokens in each completion
160
  df_long['completion_num_tokens'] = (df_long['completions'].astype(str).str.split().str.len() / 0.75).round()
161
 
 
16
  api= wandb.Api(timeout=60)
17
  wandb.login(anonymous="allow")
18
 
19
+ def pull_wandb_runs(project='opentensor-dev/openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
20
  # TODO: speed this up by storing older runs
21
 
22
  all_runs = api.runs(project, filters=filters)
 
129
 
130
  # Clean and explode dataframe
131
  # overwrite object to free memory
132
+ float_cols = df.filter(regex='reward|filter').columns
133
  df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
134
 
135
  if save:
 
156
  run_id = df_long['run_id'].iloc[0]
157
  # print(f'Calculating stats for run {run_id!r} dataframe with shape {df_long.shape}')
158
 
159
+
160
  # Approximate number of tokens in each completion
161
  df_long['completion_num_tokens'] = (df_long['completions'].astype(str).str.split().str.len() / 0.75).round()
162