steffenc commited on
Commit
08fa697
·
1 Parent(s): 0ffd9af

Fix dataframe construction bug and improve logging/tqdm outputs

Browse files
Files changed (2) hide show
  1. meta2frame.py +4 -4
  2. multigraph.py +5 -13
meta2frame.py CHANGED
@@ -21,8 +21,8 @@ def load_metagraphs(root_dir, netuid, block_min=0, block_max=3_000_000):
21
  print(f'Found {len(files)} metagraphs in {match_path}')
22
 
23
  valid_files = [path for path in files if block_min <= int(path.split('/')[-1].split('.')[0]) <= block_max]
24
- print(f'Found {len(valid_files)} valid metagraphs between {block_min} and {block_max}')
25
- for path in tqdm.tqdm(valid_files):
26
 
27
  with open(path, 'rb') as f:
28
  metagraph = pickle.load(f)
@@ -48,7 +48,7 @@ def block_to_time(blocks, subtensor=None):
48
 
49
  timestamps = {}
50
  unique_blocks = set(blocks)
51
- for block in tqdm.tqdm(unique_blocks):
52
  timestamps[block] = get_block_timestamp(block, subtensor)
53
 
54
  return blocks.map(timestamps).apply(pd.to_datetime, unit='ms')
@@ -59,7 +59,7 @@ def make_dataframe(netuid, root_dir=ROOT_DIR, cols=None, block_min=0, block_max=
59
  cols = ['stake','emission','trust','validator_trust','dividends','incentive','R', 'consensus','validator_permit']
60
  frames = []
61
  metagraphs = load_metagraphs(root_dir, netuid, block_min, block_max)
62
- print(f'Loaded {len(metagraphs)} metagraphs for netuid {netuid}')
63
  for m in metagraphs:
64
  frame = pd.DataFrame({k: getattr(m, k) for k in cols})
65
  frame['block'] = m.block.item()
 
21
  print(f'Found {len(files)} metagraphs in {match_path}')
22
 
23
  valid_files = [path for path in files if block_min <= int(path.split('/')[-1].split('.')[0]) <= block_max]
24
+ pbar = tqdm.tqdm(valid_files, desc=f'Loading {len(valid_files)} metagraph snapshots')
25
+ for path in pbar:
26
 
27
  with open(path, 'rb') as f:
28
  metagraph = pickle.load(f)
 
48
 
49
  timestamps = {}
50
  unique_blocks = set(blocks)
51
+ for block in tqdm.tqdm(unique_blocks, desc=f'Mapping {len(unique_blocks)} blocks to timestamps'):
52
  timestamps[block] = get_block_timestamp(block, subtensor)
53
 
54
  return blocks.map(timestamps).apply(pd.to_datetime, unit='ms')
 
59
  cols = ['stake','emission','trust','validator_trust','dividends','incentive','R', 'consensus','validator_permit']
60
  frames = []
61
  metagraphs = load_metagraphs(root_dir, netuid, block_min, block_max)
62
+
63
  for m in metagraphs:
64
  frame = pd.DataFrame({k: getattr(m, k) for k in cols})
65
  frame['block'] = m.block.item()
multigraph.py CHANGED
@@ -10,6 +10,7 @@ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
10
  import torch
11
  import bittensor
12
  from meta_utils import load_metagraphs
 
13
  #TODO: make line charts and other cool stuff for each metagraph snapshot
14
 
15
  def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
@@ -128,18 +129,9 @@ if __name__ == '__main__':
128
 
129
  if not args.no_dataframe:
130
  save_path = f'data/metagraph/{netuid}/df.parquet'
131
- blocks = range(start_block, end_block, step_size)
132
- df_loaded = None
133
- if os.path.exists(save_path):
134
- df_loaded = pd.read_parquet(save_path)
135
- blocks = [block for block in blocks if block not in df_loaded.block.unique()]
136
- print(f'Loaded dataframe from {save_path!r}. {len(df_loaded)} rows. {len(blocks)} blocks to process.')
137
- if len(blocks)==0:
138
- print('No blocks to process.')
139
- sys.exit(0)
140
-
141
- df = load_metagraphs(blocks[0], blocks[-1], block_step=step_size, datadir=datadir)
142
- if df_loaded is not None:
143
- df = pd.concat([df, df_loaded], ignore_index=True)
144
  df.to_parquet(save_path)
145
  print(f'Saved dataframe to {save_path!r}')
 
10
  import torch
11
  import bittensor
12
  from meta_utils import load_metagraphs
13
+ from meta2frame import make_dataframe
14
  #TODO: make line charts and other cool stuff for each metagraph snapshot
15
 
16
  def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
 
129
 
130
  if not args.no_dataframe:
131
  save_path = f'data/metagraph/{netuid}/df.parquet'
132
+ blocks = range(start_block, end_block, -step_size)
133
+ print(f'Making a dataframe for {len(blocks)} blocks in {blocks}')
134
+
135
+ df = make_dataframe(netuid = netuid, block_min = min(blocks), block_max = max(blocks), weights = not lite)
 
 
 
 
 
 
 
 
 
136
  df.to_parquet(save_path)
137
  print(f'Saved dataframe to {save_path!r}')