Spaces:

macrocosm-os
/

sn1

Paused

steffenc commited on Mar 8, 2024

Commit

08fa697

1 Parent(s): 0ffd9af

Fix dataframe construction bug and improve logging/tqdm outputs

Files changed (2) hide show

meta2frame.py CHANGED Viewed

@@ -21,8 +21,8 @@ def load_metagraphs(root_dir, netuid, block_min=0, block_max=3_000_000):
     print(f'Found {len(files)} metagraphs in {match_path}')
     valid_files = [path for path in files if block_min <= int(path.split('/')[-1].split('.')[0]) <= block_max]
-    print(f'Found {len(valid_files)} valid metagraphs between {block_min} and {block_max}')
-    for path in tqdm.tqdm(valid_files):
         with open(path, 'rb') as f:
             metagraph = pickle.load(f)
@@ -48,7 +48,7 @@ def block_to_time(blocks, subtensor=None):
     timestamps = {}
     unique_blocks = set(blocks)
-    for block in tqdm.tqdm(unique_blocks):
         timestamps[block] = get_block_timestamp(block, subtensor)
     return blocks.map(timestamps).apply(pd.to_datetime, unit='ms')
@@ -59,7 +59,7 @@ def make_dataframe(netuid, root_dir=ROOT_DIR, cols=None, block_min=0, block_max=
         cols = ['stake','emission','trust','validator_trust','dividends','incentive','R', 'consensus','validator_permit']
     frames = []
     metagraphs = load_metagraphs(root_dir, netuid, block_min, block_max)
-    print(f'Loaded {len(metagraphs)} metagraphs for netuid {netuid}')
     for m in metagraphs:
         frame = pd.DataFrame({k: getattr(m, k) for k in cols})
         frame['block'] = m.block.item()

     print(f'Found {len(files)} metagraphs in {match_path}')
     valid_files = [path for path in files if block_min <= int(path.split('/')[-1].split('.')[0]) <= block_max]
+    pbar = tqdm.tqdm(valid_files, desc=f'Loading {len(valid_files)} metagraph snapshots')
+    for path in pbar:
         with open(path, 'rb') as f:
             metagraph = pickle.load(f)
     timestamps = {}
     unique_blocks = set(blocks)
+    for block in tqdm.tqdm(unique_blocks, desc=f'Mapping {len(unique_blocks)} blocks to timestamps'):
         timestamps[block] = get_block_timestamp(block, subtensor)
     return blocks.map(timestamps).apply(pd.to_datetime, unit='ms')
         cols = ['stake','emission','trust','validator_trust','dividends','incentive','R', 'consensus','validator_permit']
     frames = []
     metagraphs = load_metagraphs(root_dir, netuid, block_min, block_max)
     for m in metagraphs:
         frame = pd.DataFrame({k: getattr(m, k) for k in cols})
         frame['block'] = m.block.item()

multigraph.py CHANGED Viewed

@@ -10,6 +10,7 @@ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 import torch
 import bittensor
 from meta_utils import load_metagraphs
 #TODO: make line charts and other cool stuff for each metagraph snapshot
 def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
@@ -128,18 +129,9 @@ if __name__ == '__main__':
     if not args.no_dataframe:
         save_path = f'data/metagraph/{netuid}/df.parquet'
-        blocks = range(start_block, end_block, step_size)
-        df_loaded = None
-        if os.path.exists(save_path):
-            df_loaded = pd.read_parquet(save_path)
-            blocks = [block for block in blocks if block not in df_loaded.block.unique()]
-            print(f'Loaded dataframe from {save_path!r}. {len(df_loaded)} rows. {len(blocks)} blocks to process.')
-            if len(blocks)==0:
-                print('No blocks to process.')
-                sys.exit(0)
-        df = load_metagraphs(blocks[0], blocks[-1], block_step=step_size, datadir=datadir)
-        if df_loaded is not None:
-            df = pd.concat([df, df_loaded], ignore_index=True)
         df.to_parquet(save_path)
         print(f'Saved dataframe to {save_path!r}')

 import torch
 import bittensor
 from meta_utils import load_metagraphs
+from meta2frame import make_dataframe
 #TODO: make line charts and other cool stuff for each metagraph snapshot
 def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
     if not args.no_dataframe:
         save_path = f'data/metagraph/{netuid}/df.parquet'
+        blocks = range(start_block, end_block, -step_size)
+        print(f'Making a dataframe for {len(blocks)} blocks in {blocks}')
+        df = make_dataframe(netuid = netuid, block_min = min(blocks), block_max = max(blocks), weights = not lite)
         df.to_parquet(save_path)
         print(f'Saved dataframe to {save_path!r}')