Spaces:

macrocosm-os
/

sn1

Paused

App Files Files Community

steffenc commited on Feb 28, 2024

Commit

e5a977e

1 Parent(s): 8810468

update scripts

Browse files

Files changed (2) hide show

multigraph.py +30 -24
multistats.py +3 -2

multigraph.py CHANGED Viewed

@@ -15,13 +15,13 @@ from meta_utils import load_metagraphs
 def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
     if subtensor is None:
-        subtensor = bittensor.subtensor(network='finney')
-    try:
         metagraph = subtensor.metagraph(block=block, netuid=netuid, lite=lite)
         if difficulty:
             metagraph.difficulty = subtensor.difficulty(block=block, netuid=netuid)
         if not lite:
             if half:
                 metagraph.weights = torch.nn.Parameter(metagraph.weights.half(), requires_grad=False)
@@ -32,65 +32,71 @@ def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, r
             pickle.dump(metagraph, f)
         return metagraph if return_graph else True
     except Exception as e:
         print(f'Error processing block {block}: {e}')
 def parse_arguments():
     parser = argparse.ArgumentParser(description='Process metagraphs for a given network.')
     parser.add_argument('--netuid', type=int, default=1, help='Network UID to use.')
     parser.add_argument('--difficulty', action='store_true', help='Include difficulty in metagraph.')
     parser.add_argument('--prune_weights', action='store_true', help='Prune weights in metagraph.')
     parser.add_argument('--return_graph', action='store_true', help='Return metagraph instead of True.')
     parser.add_argument('--no_dataframe', action='store_true', help='Do not create dataframe.')
     parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
     parser.add_argument('--start_block', type=int, default=1_500_000, help='Start block.')
     parser.add_argument('--end_block', type=int, default=600_000, help='End block.')
     parser.add_argument('--step_size', type=int, default=100, help='Step size.')
     return parser.parse_args()
 if __name__ == '__main__':
-    subtensor = bittensor.subtensor(network='finney')
     print(f'Current block: {subtensor.block}')
     args = parse_arguments()
     netuid=args.netuid
     difficulty=args.difficulty
-    overwrite=False
-    return_graph=args.return_graph
     step_size = args.step_size
     start_block = args.start_block
     start_block = (min(subtensor.block, start_block)//step_size)*step_size # round to nearest step_size
-    end_block = args.end_block
     blocks = range(start_block, end_block, -step_size)
-    # only get weights for multiple of 500 blocks
-    lite=lambda x: x%500!=0
     max_workers = min(args.max_workers, len(blocks))
     datadir = f'data/metagraph/{netuid}'
     os.makedirs(datadir, exist_ok=True)
-    if not overwrite:
         blocks = [block for block in blocks if not os.path.exists(f'data/metagraph/{netuid}/{block}.pkl')]
     metagraphs = []
     if len(blocks)>0:
         print(f'Processing {len(blocks)} blocks from {blocks[0]}-{blocks[-1]} using {max_workers} workers.')
         with ProcessPoolExecutor(max_workers=max_workers) as executor:
             futures = [
-                executor.submit(process, block, lite=lite(block), netuid=netuid, difficulty=difficulty)
                 for block in blocks
                 ]
-            success = 0
             with tqdm.tqdm(total=len(futures)) as pbar:
                 for block, future in zip(blocks,futures):
                     try:
@@ -103,7 +109,7 @@ if __name__ == '__main__':
         if not success:
             raise ValueError('No blocks were successfully processed.')
         print(f'Processed {success} blocks.')
         if return_graph:
             for metagraph in metagraphs:
@@ -125,7 +131,7 @@ if __name__ == '__main__':
             if len(blocks)==0:
                 print('No blocks to process.')
                 sys.exit(0)
         df = load_metagraphs(blocks[0], blocks[-1], block_step=step_size, datadir=datadir)
         if df_loaded is not None:
             df = pd.concat([df, df_loaded], ignore_index=True)

 def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
     if subtensor is None:
+        subtensor = bittensor.subtensor(network='archive')
+    try:
         metagraph = subtensor.metagraph(block=block, netuid=netuid, lite=lite)
         if difficulty:
             metagraph.difficulty = subtensor.difficulty(block=block, netuid=netuid)
         if not lite:
             if half:
                 metagraph.weights = torch.nn.Parameter(metagraph.weights.half(), requires_grad=False)
             pickle.dump(metagraph, f)
         return metagraph if return_graph else True
     except Exception as e:
         print(f'Error processing block {block}: {e}')
 def parse_arguments():
     parser = argparse.ArgumentParser(description='Process metagraphs for a given network.')
     parser.add_argument('--netuid', type=int, default=1, help='Network UID to use.')
+    parser.add_argument('--lite', action='store_true', help='Do not include weights.')
     parser.add_argument('--difficulty', action='store_true', help='Include difficulty in metagraph.')
     parser.add_argument('--prune_weights', action='store_true', help='Prune weights in metagraph.')
     parser.add_argument('--return_graph', action='store_true', help='Return metagraph instead of True.')
     parser.add_argument('--no_dataframe', action='store_true', help='Do not create dataframe.')
     parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
     parser.add_argument('--start_block', type=int, default=1_500_000, help='Start block.')
+    parser.add_argument('--num_blocks', type=int, default=0, help='Number of blocks.')
     parser.add_argument('--end_block', type=int, default=600_000, help='End block.')
     parser.add_argument('--step_size', type=int, default=100, help='Step size.')
+    parser.add_argument('--overwrite', action='store_true',help='Overwrite existing files')
     return parser.parse_args()
 if __name__ == '__main__':
+    subtensor = bittensor.subtensor(network='archive')
     print(f'Current block: {subtensor.block}')
     args = parse_arguments()
+    print(args)
     netuid=args.netuid
+    lite=args.lite
     difficulty=args.difficulty
+    return_graph=args.return_graph
     step_size = args.step_size
     start_block = args.start_block
     start_block = (min(subtensor.block, start_block)//step_size)*step_size # round to nearest step_size
+    if args.num_blocks:
+        end_block = start_block - int(args.num_blocks*step_size)
+    else:
+        end_block = args.end_block
     blocks = range(start_block, end_block, -step_size)
     max_workers = min(args.max_workers, len(blocks))
     datadir = f'data/metagraph/{netuid}'
     os.makedirs(datadir, exist_ok=True)
+    if not args.overwrite:
         blocks = [block for block in blocks if not os.path.exists(f'data/metagraph/{netuid}/{block}.pkl')]
     metagraphs = []
     if len(blocks)>0:
         print(f'Processing {len(blocks)} blocks from {blocks[0]}-{blocks[-1]} using {max_workers} workers.')
         with ProcessPoolExecutor(max_workers=max_workers) as executor:
             futures = [
+                executor.submit(process, block, lite=args.lite, netuid=netuid, difficulty=difficulty)
                 for block in blocks
                 ]
+            success = 0
             with tqdm.tqdm(total=len(futures)) as pbar:
                 for block, future in zip(blocks,futures):
                     try:
         if not success:
             raise ValueError('No blocks were successfully processed.')
         print(f'Processed {success} blocks.')
         if return_graph:
             for metagraph in metagraphs:
             if len(blocks)==0:
                 print('No blocks to process.')
                 sys.exit(0)
         df = load_metagraphs(blocks[0], blocks[-1], block_step=step_size, datadir=datadir)
         if df_loaded is not None:
             df = pd.concat([df, df_loaded], ignore_index=True)

multistats.py CHANGED Viewed

@@ -16,7 +16,7 @@ from IPython.display import display
 api= wandb.Api(timeout=60)
 wandb.login(anonymous="allow")
-def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
     # TODO: speed this up by storing older runs
     all_runs = api.runs(project, filters=filters)
@@ -129,7 +129,7 @@ def load_data(run_id, run_path=None, load=True, save=False, explode=True):
         # Clean and explode dataframe
         # overwrite object to free memory
-        float_cols = df.filter(regex='reward').columns
         df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
         if save:
@@ -156,6 +156,7 @@ def calculate_stats(df_long, freq='H', save_path=None, ntop=3 ):
     run_id = df_long['run_id'].iloc[0]
     # print(f'Calculating stats for run {run_id!r} dataframe with shape {df_long.shape}')
     # Approximate number of tokens in each completion
     df_long['completion_num_tokens'] = (df_long['completions'].astype(str).str.split().str.len() / 0.75).round()

 api= wandb.Api(timeout=60)
 wandb.login(anonymous="allow")
+def pull_wandb_runs(project='opentensor-dev/openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
     # TODO: speed this up by storing older runs
     all_runs = api.runs(project, filters=filters)
         # Clean and explode dataframe
         # overwrite object to free memory
+        float_cols = df.filter(regex='reward|filter').columns
         df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
         if save:
     run_id = df_long['run_id'].iloc[0]
     # print(f'Calculating stats for run {run_id!r} dataframe with shape {df_long.shape}')
     # Approximate number of tokens in each completion
     df_long['completion_num_tokens'] = (df_long['completions'].astype(str).str.split().str.len() / 0.75).round()