Spaces:
Paused
Paused
update scripts
Browse files- multigraph.py +30 -24
- multistats.py +3 -2
multigraph.py
CHANGED
@@ -15,13 +15,13 @@ from meta_utils import load_metagraphs
|
|
15 |
def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
|
16 |
|
17 |
if subtensor is None:
|
18 |
-
subtensor = bittensor.subtensor(network='
|
19 |
-
|
20 |
-
try:
|
21 |
metagraph = subtensor.metagraph(block=block, netuid=netuid, lite=lite)
|
22 |
if difficulty:
|
23 |
metagraph.difficulty = subtensor.difficulty(block=block, netuid=netuid)
|
24 |
-
|
25 |
if not lite:
|
26 |
if half:
|
27 |
metagraph.weights = torch.nn.Parameter(metagraph.weights.half(), requires_grad=False)
|
@@ -32,65 +32,71 @@ def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, r
|
|
32 |
pickle.dump(metagraph, f)
|
33 |
|
34 |
return metagraph if return_graph else True
|
35 |
-
|
36 |
except Exception as e:
|
37 |
print(f'Error processing block {block}: {e}')
|
38 |
-
|
39 |
|
40 |
def parse_arguments():
|
41 |
parser = argparse.ArgumentParser(description='Process metagraphs for a given network.')
|
42 |
parser.add_argument('--netuid', type=int, default=1, help='Network UID to use.')
|
|
|
43 |
parser.add_argument('--difficulty', action='store_true', help='Include difficulty in metagraph.')
|
44 |
parser.add_argument('--prune_weights', action='store_true', help='Prune weights in metagraph.')
|
45 |
parser.add_argument('--return_graph', action='store_true', help='Return metagraph instead of True.')
|
46 |
parser.add_argument('--no_dataframe', action='store_true', help='Do not create dataframe.')
|
47 |
parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
|
48 |
parser.add_argument('--start_block', type=int, default=1_500_000, help='Start block.')
|
|
|
49 |
parser.add_argument('--end_block', type=int, default=600_000, help='End block.')
|
50 |
parser.add_argument('--step_size', type=int, default=100, help='Step size.')
|
|
|
51 |
return parser.parse_args()
|
52 |
|
53 |
if __name__ == '__main__':
|
54 |
|
55 |
-
subtensor = bittensor.subtensor(network='
|
56 |
print(f'Current block: {subtensor.block}')
|
57 |
-
|
58 |
args = parse_arguments()
|
59 |
-
|
|
|
60 |
netuid=args.netuid
|
|
|
61 |
difficulty=args.difficulty
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
step_size = args.step_size
|
66 |
start_block = args.start_block
|
67 |
start_block = (min(subtensor.block, start_block)//step_size)*step_size # round to nearest step_size
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
blocks = range(start_block, end_block, -step_size)
|
70 |
|
71 |
-
|
72 |
-
lite=lambda x: x%500!=0
|
73 |
-
|
74 |
max_workers = min(args.max_workers, len(blocks))
|
75 |
|
76 |
datadir = f'data/metagraph/{netuid}'
|
77 |
os.makedirs(datadir, exist_ok=True)
|
78 |
-
if not overwrite:
|
79 |
blocks = [block for block in blocks if not os.path.exists(f'data/metagraph/{netuid}/{block}.pkl')]
|
80 |
|
81 |
metagraphs = []
|
82 |
-
|
83 |
if len(blocks)>0:
|
84 |
-
|
85 |
print(f'Processing {len(blocks)} blocks from {blocks[0]}-{blocks[-1]} using {max_workers} workers.')
|
86 |
-
|
87 |
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
88 |
futures = [
|
89 |
-
executor.submit(process, block, lite=lite
|
90 |
for block in blocks
|
91 |
]
|
92 |
|
93 |
-
success = 0
|
94 |
with tqdm.tqdm(total=len(futures)) as pbar:
|
95 |
for block, future in zip(blocks,futures):
|
96 |
try:
|
@@ -103,7 +109,7 @@ if __name__ == '__main__':
|
|
103 |
|
104 |
if not success:
|
105 |
raise ValueError('No blocks were successfully processed.')
|
106 |
-
|
107 |
print(f'Processed {success} blocks.')
|
108 |
if return_graph:
|
109 |
for metagraph in metagraphs:
|
@@ -125,7 +131,7 @@ if __name__ == '__main__':
|
|
125 |
if len(blocks)==0:
|
126 |
print('No blocks to process.')
|
127 |
sys.exit(0)
|
128 |
-
|
129 |
df = load_metagraphs(blocks[0], blocks[-1], block_step=step_size, datadir=datadir)
|
130 |
if df_loaded is not None:
|
131 |
df = pd.concat([df, df_loaded], ignore_index=True)
|
|
|
15 |
def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
|
16 |
|
17 |
if subtensor is None:
|
18 |
+
subtensor = bittensor.subtensor(network='archive')
|
19 |
+
|
20 |
+
try:
|
21 |
metagraph = subtensor.metagraph(block=block, netuid=netuid, lite=lite)
|
22 |
if difficulty:
|
23 |
metagraph.difficulty = subtensor.difficulty(block=block, netuid=netuid)
|
24 |
+
|
25 |
if not lite:
|
26 |
if half:
|
27 |
metagraph.weights = torch.nn.Parameter(metagraph.weights.half(), requires_grad=False)
|
|
|
32 |
pickle.dump(metagraph, f)
|
33 |
|
34 |
return metagraph if return_graph else True
|
35 |
+
|
36 |
except Exception as e:
|
37 |
print(f'Error processing block {block}: {e}')
|
38 |
+
|
39 |
|
40 |
def parse_arguments():
|
41 |
parser = argparse.ArgumentParser(description='Process metagraphs for a given network.')
|
42 |
parser.add_argument('--netuid', type=int, default=1, help='Network UID to use.')
|
43 |
+
parser.add_argument('--lite', action='store_true', help='Do not include weights.')
|
44 |
parser.add_argument('--difficulty', action='store_true', help='Include difficulty in metagraph.')
|
45 |
parser.add_argument('--prune_weights', action='store_true', help='Prune weights in metagraph.')
|
46 |
parser.add_argument('--return_graph', action='store_true', help='Return metagraph instead of True.')
|
47 |
parser.add_argument('--no_dataframe', action='store_true', help='Do not create dataframe.')
|
48 |
parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
|
49 |
parser.add_argument('--start_block', type=int, default=1_500_000, help='Start block.')
|
50 |
+
parser.add_argument('--num_blocks', type=int, default=0, help='Number of blocks.')
|
51 |
parser.add_argument('--end_block', type=int, default=600_000, help='End block.')
|
52 |
parser.add_argument('--step_size', type=int, default=100, help='Step size.')
|
53 |
+
parser.add_argument('--overwrite', action='store_true',help='Overwrite existing files')
|
54 |
return parser.parse_args()
|
55 |
|
56 |
if __name__ == '__main__':
|
57 |
|
58 |
+
subtensor = bittensor.subtensor(network='archive')
|
59 |
print(f'Current block: {subtensor.block}')
|
60 |
+
|
61 |
args = parse_arguments()
|
62 |
+
print(args)
|
63 |
+
|
64 |
netuid=args.netuid
|
65 |
+
lite=args.lite
|
66 |
difficulty=args.difficulty
|
67 |
+
return_graph=args.return_graph
|
68 |
+
|
|
|
69 |
step_size = args.step_size
|
70 |
start_block = args.start_block
|
71 |
start_block = (min(subtensor.block, start_block)//step_size)*step_size # round to nearest step_size
|
72 |
+
if args.num_blocks:
|
73 |
+
end_block = start_block - int(args.num_blocks*step_size)
|
74 |
+
else:
|
75 |
+
end_block = args.end_block
|
76 |
+
|
77 |
blocks = range(start_block, end_block, -step_size)
|
78 |
|
79 |
+
|
|
|
|
|
80 |
max_workers = min(args.max_workers, len(blocks))
|
81 |
|
82 |
datadir = f'data/metagraph/{netuid}'
|
83 |
os.makedirs(datadir, exist_ok=True)
|
84 |
+
if not args.overwrite:
|
85 |
blocks = [block for block in blocks if not os.path.exists(f'data/metagraph/{netuid}/{block}.pkl')]
|
86 |
|
87 |
metagraphs = []
|
88 |
+
|
89 |
if len(blocks)>0:
|
90 |
+
|
91 |
print(f'Processing {len(blocks)} blocks from {blocks[0]}-{blocks[-1]} using {max_workers} workers.')
|
92 |
+
|
93 |
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
94 |
futures = [
|
95 |
+
executor.submit(process, block, lite=args.lite, netuid=netuid, difficulty=difficulty)
|
96 |
for block in blocks
|
97 |
]
|
98 |
|
99 |
+
success = 0
|
100 |
with tqdm.tqdm(total=len(futures)) as pbar:
|
101 |
for block, future in zip(blocks,futures):
|
102 |
try:
|
|
|
109 |
|
110 |
if not success:
|
111 |
raise ValueError('No blocks were successfully processed.')
|
112 |
+
|
113 |
print(f'Processed {success} blocks.')
|
114 |
if return_graph:
|
115 |
for metagraph in metagraphs:
|
|
|
131 |
if len(blocks)==0:
|
132 |
print('No blocks to process.')
|
133 |
sys.exit(0)
|
134 |
+
|
135 |
df = load_metagraphs(blocks[0], blocks[-1], block_step=step_size, datadir=datadir)
|
136 |
if df_loaded is not None:
|
137 |
df = pd.concat([df, df_loaded], ignore_index=True)
|
multistats.py
CHANGED
@@ -16,7 +16,7 @@ from IPython.display import display
|
|
16 |
api= wandb.Api(timeout=60)
|
17 |
wandb.login(anonymous="allow")
|
18 |
|
19 |
-
def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
|
20 |
# TODO: speed this up by storing older runs
|
21 |
|
22 |
all_runs = api.runs(project, filters=filters)
|
@@ -129,7 +129,7 @@ def load_data(run_id, run_path=None, load=True, save=False, explode=True):
|
|
129 |
|
130 |
# Clean and explode dataframe
|
131 |
# overwrite object to free memory
|
132 |
-
float_cols = df.filter(regex='reward').columns
|
133 |
df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
|
134 |
|
135 |
if save:
|
@@ -156,6 +156,7 @@ def calculate_stats(df_long, freq='H', save_path=None, ntop=3 ):
|
|
156 |
run_id = df_long['run_id'].iloc[0]
|
157 |
# print(f'Calculating stats for run {run_id!r} dataframe with shape {df_long.shape}')
|
158 |
|
|
|
159 |
# Approximate number of tokens in each completion
|
160 |
df_long['completion_num_tokens'] = (df_long['completions'].astype(str).str.split().str.len() / 0.75).round()
|
161 |
|
|
|
16 |
api= wandb.Api(timeout=60)
|
17 |
wandb.login(anonymous="allow")
|
18 |
|
19 |
+
def pull_wandb_runs(project='opentensor-dev/openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
|
20 |
# TODO: speed this up by storing older runs
|
21 |
|
22 |
all_runs = api.runs(project, filters=filters)
|
|
|
129 |
|
130 |
# Clean and explode dataframe
|
131 |
# overwrite object to free memory
|
132 |
+
float_cols = df.filter(regex='reward|filter').columns
|
133 |
df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
|
134 |
|
135 |
if save:
|
|
|
156 |
run_id = df_long['run_id'].iloc[0]
|
157 |
# print(f'Calculating stats for run {run_id!r} dataframe with shape {df_long.shape}')
|
158 |
|
159 |
+
|
160 |
# Approximate number of tokens in each completion
|
161 |
df_long['completion_num_tokens'] = (df_long['completions'].astype(str).str.split().str.len() / 0.75).round()
|
162 |
|