Spaces:

macrocosm-os
/

sn1

Paused

App Files Files Community

steffenc commited on Jul 26, 2023

Commit

b45152a

1 Parent(s): 101093d

Major changes for efficiency, detail and presentation

Browse files

Files changed (4) hide show

meta_plotting.py +8 -8
meta_utils.py +20 -9
metagraph.py +0 -169
multistats.py +162 -62

meta_plotting.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import numpy as np
 import plotly.express as px
-def plot_trace(df, col='emission', agg='mean', ntop=10, hotkeys=None, hotkey_regex=None, abbrev=8, type='Miners'):
     if hotkeys is not None:
         df = df.loc[df.hotkey.isin(hotkeys)]
@@ -10,14 +10,14 @@ def plot_trace(df, col='emission', agg='mean', ntop=10, hotkeys=None, hotkey_reg
     top_miners = df.groupby('hotkey')[col].agg(agg).sort_values(ascending=False)
-    stats = df.loc[df.hotkey.isin(top_miners.index[:ntop])].sort_values(by=['timestamp'])
     stats['hotkey_abbrev'] = stats.hotkey.str[:abbrev]
     stats['coldkey_abbrev'] = stats.coldkey.str[:abbrev]
     stats['rank'] = stats.hotkey.map({k:i for i,k in enumerate(top_miners.index, start=1)})
-    return px.line(stats.sort_values(by=['timestamp','rank']),
-                    x='timestamp', y=col, color='coldkey_abbrev', line_group='hotkey_abbrev',
                     hover_data=['hotkey','rank'],
                     labels={col:col.title(),'timestamp':'','coldkey_abbrev':f'Coldkey (first {abbrev} chars)','hotkey_abbrev':f'Hotkey (first {abbrev} chars)'},
                     title=f'Top {ntop} {type}, by {col.title()}',
@@ -25,18 +25,18 @@ def plot_trace(df, col='emission', agg='mean', ntop=10, hotkeys=None, hotkey_reg
                     ).update_traces(opacity=0.7)
-def plot_cabals(df, sel_col='coldkey', count_col='hotkey', values=None, ntop=10, abbr=8):
     if values is None:
         values = df[sel_col].value_counts().sort_values(ascending=False).index[:ntop].tolist()
         print(f'Automatically selected {sel_col!r} = {values!r}')
     df = df.loc[df[sel_col].isin(values)]
-    rates = df.groupby(['timestamp',sel_col])[count_col].nunique().reset_index()
     abbr_col = f'{sel_col} (first {abbr} chars)'
     rates[abbr_col] = rates[sel_col].str[:abbr]
-    return px.line(rates.melt(id_vars=['timestamp',sel_col,abbr_col]),
-            x='timestamp', y='value', color=abbr_col,
             #facet_col='variable',  facet_col_wrap=1,
             labels={'value':f'Number of Unique {count_col.title()}s per {sel_col.title()}','timestamp':''},
             category_orders={abbr_col:[ v[:abbr] for v in values]},

 import numpy as np
 import plotly.express as px
+def plot_trace(df, col='emission', agg='mean', time_col='timestamp', ntop=10, hotkeys=None, hotkey_regex=None, abbrev=8, type='Miners'):
     if hotkeys is not None:
         df = df.loc[df.hotkey.isin(hotkeys)]
     top_miners = df.groupby('hotkey')[col].agg(agg).sort_values(ascending=False)
+    stats = df.loc[df.hotkey.isin(top_miners.index[:ntop])].sort_values(by=time_col)
     stats['hotkey_abbrev'] = stats.hotkey.str[:abbrev]
     stats['coldkey_abbrev'] = stats.coldkey.str[:abbrev]
     stats['rank'] = stats.hotkey.map({k:i for i,k in enumerate(top_miners.index, start=1)})
+    return px.line(stats.sort_values(by=[time_col,'rank']),
+                    x=time_col, y=col, color='coldkey_abbrev', line_group='hotkey_abbrev',
                     hover_data=['hotkey','rank'],
                     labels={col:col.title(),'timestamp':'','coldkey_abbrev':f'Coldkey (first {abbrev} chars)','hotkey_abbrev':f'Hotkey (first {abbrev} chars)'},
                     title=f'Top {ntop} {type}, by {col.title()}',
                     ).update_traces(opacity=0.7)
+def plot_cabals(df, sel_col='coldkey', count_col='hotkey', time_col='timestamp', values=None, ntop=10, abbr=8):
     if values is None:
         values = df[sel_col].value_counts().sort_values(ascending=False).index[:ntop].tolist()
         print(f'Automatically selected {sel_col!r} = {values!r}')
     df = df.loc[df[sel_col].isin(values)]
+    rates = df.groupby([time_col,sel_col])[count_col].nunique().reset_index()
     abbr_col = f'{sel_col} (first {abbr} chars)'
     rates[abbr_col] = rates[sel_col].str[:abbr]
+    return px.line(rates.melt(id_vars=[time_col,sel_col,abbr_col]),
+            x=time_col, y='value', color=abbr_col,
             #facet_col='variable',  facet_col_wrap=1,
             labels={'value':f'Number of Unique {count_col.title()}s per {sel_col.title()}','timestamp':''},
             category_orders={abbr_col:[ v[:abbr] for v in values]},

meta_utils.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import os
 import glob
 import tqdm
-import pickle
 import subprocess
 import pandas as pd
 def run_subprocess(*args):
     # Trigger the multigraph.py script to run and save metagraph snapshots
@@ -18,31 +23,37 @@ def load_metagraph(path, extra_cols=None, rm_cols=None):
     df = pd.DataFrame(metagraph.axons)
     df['block'] = metagraph.block.item()
     df['difficulty'] = metagraph.difficulty
     for c in extra_cols:
         vals = getattr(metagraph,c)
         df[c] = vals
     return df.drop(columns=rm_cols)
 def load_metagraphs(block_start, block_end, block_step=1000, datadir='data/metagraph/1/', extra_cols=None):
     if extra_cols is None:
         extra_cols = ['total_stake','ranks','incentive','emission','consensus','trust','validator_trust','dividends']
     blocks = range(block_start, block_end, block_step)
-    filenames = sorted(path for path in os.listdir(datadir) if int(path.split('.')[0]) in blocks)
     metagraphs = []
     pbar = tqdm.tqdm(filenames)
     for filename in pbar:
         pbar.set_description(f'Processing {filename}')
-        metagraph = load_metagraph(os.path.join(datadir, filename), extra_cols=extra_cols, rm_cols=['protocol','placeholder1','placeholder2'])
-        metagraphs.append(metagraph)
     return pd.concat(metagraphs)
-load_metagraphs(block_start=700_000, block_end=800_000, block_step=1000)

 import os
 import glob
 import tqdm
+import dill as pickle
 import subprocess
 import pandas as pd
+import datetime
+from functools import lru_cache
+block_time_500k = datetime.datetime(2023, 5, 29, 5, 29, 0)
+block_time_800k = datetime.datetime(2023, 7, 9, 21, 32, 48)
+dt = (pd.Timestamp(block_time_800k)-pd.Timestamp(block_time_500k))/(800_000-500_000)
 def run_subprocess(*args):
     # Trigger the multigraph.py script to run and save metagraph snapshots
     df = pd.DataFrame(metagraph.axons)
     df['block'] = metagraph.block.item()
+    df['timestamp'] = block_time_500k + dt*(df['block']-500_000)
     df['difficulty'] = metagraph.difficulty
     for c in extra_cols:
         vals = getattr(metagraph,c)
         df[c] = vals
     return df.drop(columns=rm_cols)
+@lru_cache(maxsize=16)
 def load_metagraphs(block_start, block_end, block_step=1000, datadir='data/metagraph/1/', extra_cols=None):
     if extra_cols is None:
         extra_cols = ['total_stake','ranks','incentive','emission','consensus','trust','validator_trust','dividends']
     blocks = range(block_start, block_end, block_step)
+    print(f'Loading blocks {blocks[0]}-{blocks[-1]} from {datadir}')
+    filenames = sorted(filename for filename in os.listdir(datadir) if int(filename.split('.')[0]) in blocks)
+    print(f'Found {len(filenames)} files in {datadir}')
     metagraphs = []
     pbar = tqdm.tqdm(filenames)
     for filename in pbar:
         pbar.set_description(f'Processing {filename}')
+        try:
+            metagraph = load_metagraph(os.path.join(datadir, filename), extra_cols=extra_cols, rm_cols=['protocol','placeholder1','placeholder2'])
+            metagraphs.append(metagraph)
+        except Exception as e:
+            print(f'filename {filename!r} generated an exception: { e }')
     return pd.concat(metagraphs)

metagraph.py DELETED Viewed

@@ -1,169 +0,0 @@
-import streamlit as st
-from meta_utils import run_subprocess, load_metagraphs
-# from opendashboards.assets import io, inspect, metric, plot
-from meta_plotting import plot_trace, plot_cabals
-DEFAULT_SRC = 'miner'
-DEFAULT_NTOP = 10
-DEFAULT_UID_NTOP = 10
-# Set app config
-st.set_page_config(
-    page_title='Validator Dashboard',
-    menu_items={
-        'Report a bug': "https://github.com/opentensor/dashboards/issues",
-        'About': """
-        This dashboard is part of the OpenTensor project. \n
-        """
-    },
-    layout = "centered"
-    )
-st.title('Metagraph :red[Analysis] Dashboard :eyes:')
-# add vertical space
-st.markdown('#')
-st.markdown('#')
-with st.spinner(text=f'Loading data...'):
-    df = load_metagraphs()
-blocks = df.block.unique()
-# metric.wandb(df_runs)
-# add vertical space
-st.markdown('#')
-st.markdown('#')
-tab1, tab2, tab3, tab4 = st.tabs(["Health", "Miners", "Validators", "Block"])
-### Wandb Runs ###
-with tab1:
-    st.markdown('#')
-    st.header(":violet[Wandb] Runs")
-    run_msg = st.info("Select a single run or compare multiple runs")
-    selected_runs = st.multiselect(f'Runs ({len(df_runs)})', df_runs.id, default=DEFAULT_SELECTED_RUNS, key='runs')
-    # Load data if new runs selected
-    if not selected_runs:
-        # open a dialog to select runs
-        run_msg.error("Please select at least one run")
-        st.snow()
-        st.stop()
-    df = io.load_data(df_runs.loc[df_runs.id.isin(selected_runs)], load=True, save=True)
-    df_long = inspect.explode_data(df)
-    df_weights = inspect.weights(df)
-    metric.runs(df, df_long, selected_runs)
-    with st.expander(f'Show :violet[raw] data for {len(selected_runs)} selected runs'):
-        inspect.run_event_data(df_runs,df, selected_runs)
-### UID Health ###
-with tab2:
-    st.markdown('#')
-    st.header("UID :violet[Health]")
-    st.info(f"Showing UID health metrics for **{len(selected_runs)} selected runs**")
-    uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
-    metric.uids(df_long, uid_src)
-    with st.expander(f'Show UID **{uid_src}** weights data for **{len(selected_runs)} selected runs**'):
-        uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
-        st.markdown('#')
-        st.subheader(f"UID {uid_src.title()} :violet[Weights]")
-        plot.weights(
-                df_weights,
-                uids=uids,
-        )
-    with st.expander(f'Show UID **{uid_src}** leaderboard data for **{len(selected_runs)} selected runs**'):
-        st.markdown('#')
-        st.subheader(f"UID {uid_src.title()} :violet[Leaderboard]")
-        uid_col1, uid_col2 = st.columns(2)
-        uid_ntop = uid_col1.slider('Number of UIDs:', min_value=1, max_value=50, value=DEFAULT_UID_NTOP, key='uid_ntop')
-        uid_agg = uid_col2.selectbox('Aggregation:', ('mean','min','max','size','nunique'), key='uid_agg')
-        plot.leaderboard(
-                df,
-                ntop=uid_ntop,
-                group_on=f'{uid_src}_uids',
-                agg_col=f'{uid_src}_rewards',
-                agg=uid_agg
-            )
-    with st.expander(f'Show UID **{uid_src}** diversity data for **{len(selected_runs)} selected runs**'):
-        st.markdown('#')
-        st.subheader(f"UID {uid_src.title()} :violet[Diversity]")
-        rm_failed = st.checkbox(f'Remove failed **{uid_src}** completions', value=True)
-        plot.uid_diversty(df, rm_failed)
-### Completions ###
-with tab3:
-    st.markdown('#')
-    st.subheader('Completion :violet[Leaderboard]')
-    completion_info = st.empty()
-    msg_col1, msg_col2 = st.columns(2)
-    completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
-    completion_info.info(f"Showing **{completion_src}** completions for **{len(selected_runs)} selected runs**")
-    completion_ntop = msg_col2.slider('Top k:', min_value=1, max_value=50, value=DEFAULT_COMPLETION_NTOP, key='completion_ntop')
-    completion_col = f'{completion_src}_completions'
-    reward_col = f'{completion_src}_rewards'
-    uid_col = f'{completion_src}_uids'
-    completions = inspect.completions(df_long, completion_col)
-    # Get completions with highest average rewards
-    plot.leaderboard(
-        df,
-        ntop=completion_ntop,
-        group_on=completion_col,
-        agg_col=reward_col,
-        agg='mean',
-        alias=True
-    )
-    with st.expander(f'Show **{completion_src}** completion rewards data for **{len(selected_runs)} selected runs**'):
-        st.markdown('#')
-        st.subheader('Completion :violet[Rewards]')
-        completion_select = st.multiselect('Completions:', completions.index, default=completions.index[:3].tolist())
-        # completion_regex = st.text_input('Completion regex:', value='', key='completion_regex')
-        plot.completion_rewards(
-            df,
-            completion_col=completion_col,
-            reward_col=reward_col,
-            uid_col=uid_col,
-            ntop=completion_ntop,
-            completions=completion_select,
-        )
-### Prompt-based scoring ###
-with tab4:
-    # coming soon
-    st.info('Prompt-based scoring coming soon')
-    # st.dataframe(df_long_long.filter(regex=prompt_src).head())

multistats.py CHANGED Viewed

@@ -1,23 +1,24 @@
 import os
-import warnings
 import re
 import tqdm
 import wandb
-from traceback import print_exc
 import plotly.express as px
 import pandas as pd
 from concurrent.futures import ProcessPoolExecutor
 import opendashboards.utils.utils as utils
 from IPython.display import display
 api= wandb.Api(timeout=60)
 wandb.login(anonymous="allow")
-def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, summary_filters=None ):
     # TODO: speed this up by storing older runs
     all_runs = api.runs(project, filters=filters)
     print(f'Using {ntop}/{len(all_runs)} runs with more than {min_steps} events')
     pbar = tqdm.tqdm(all_runs)
@@ -29,6 +30,8 @@ def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_st
         summary = run.summary
         if summary_filters is not None and not summary_filters(summary):
             continue
         step = summary.get('_step',0)
         if step < min_steps or step > max_steps:
             # warnings.warn(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
@@ -60,6 +63,7 @@ def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_st
             'start_time': pd.to_datetime(end_time-duration, unit="s"),
             'end_time': pd.to_datetime(end_time, unit="s"),
             'duration': pd.to_timedelta(duration, unit="s").round('s'),
             **tags
         })
         n_events += step
@@ -85,38 +89,60 @@ def plot_gantt(df_runs):
     fig.update_yaxes(tickfont_size=8, title='')
     fig.show()
-def load_data(run_id, run_path=None, load=True, save=False, timeout=30):
-    file_path = os.path.join('data/runs/',f'history-{run_id}.csv')
     if load and os.path.exists(file_path):
-        df = pd.read_csv(file_path, nrows=None)
         # filter out events with missing step length
         df = df.loc[df.step_length.notna()]
         # detect list columns which as stored as strings
         list_cols = [c for c in df.columns if df[c].dtype == "object" and df[c].str.startswith("[").all()]
         # convert string representation of list to list
-        df[list_cols] = df[list_cols].applymap(eval, na_action='ignore')
     else:
         # Download the history from wandb and add metadata
         run = api.run(run_path)
         df = pd.DataFrame(list(run.scan_history()))
         print(f'Downloaded {df.shape[0]} events from {run_path!r} with id {run_id!r}')
         if save:
-            df.to_csv(file_path, index=False)
     # Convert timestamp to datetime.
     df._timestamp = pd.to_datetime(df._timestamp, unit="s")
     return df.sort_values("_timestamp")
-def calculate_stats(df_long, rm_failed=True, rm_zero_reward=True, freq='H', save_path=None ):
     df_long._timestamp = pd.to_datetime(df_long._timestamp)
     # if dataframe has columns such as followup_completions and answer_completions, convert to multiple rows
     if 'completions' not in df_long.columns:
         df_long.set_index(['_timestamp','run_id'], inplace=True)
@@ -126,79 +152,144 @@ def calculate_stats(df_long, rm_failed=True, rm_zero_reward=True, freq='H', save
         ])
         df_long = df_schema.reset_index()
-    if rm_failed:
-        df_long = df_long.loc[ df_long.completions.str.len()>0 ]
-    if rm_zero_reward:
-        df_long = df_long.loc[ df_long.rewards>0 ]
     print(f'Calculating stats for dataframe with shape {df_long.shape}')
     g = df_long.groupby([pd.Grouper(key='_timestamp', axis=0, freq=freq), 'run_id'])
-    stats = g.agg({'completions':['nunique','count'], 'rewards':['sum','mean','std']})
     stats.columns = ['_'.join(c) for c in stats.columns]
-    stats['completions_diversity'] = stats['completions_nunique'] / stats['completions_count']
     stats = stats.reset_index()
-    if save_path:
         stats.to_csv(save_path, index=False)
     return stats
-def clean_data(df):
-    return df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any').dropna(axis=1, how='all')
-def explode_data(df):
-    list_cols = utils.get_list_col_lengths(df)
-    return utils.explode_data(df, list(list_cols.keys())).apply(pd.to_numeric, errors='ignore')
-def process(run, load=True, save=False, freq='H'):
     try:
         stats_path = f'data/aggs/stats-{run["run_id"]}.csv'
-        if os.path.exists(stats_path):
-            print(f'Loaded stats file {stats_path}')
             return pd.read_csv(stats_path)
         # Load data and add extra columns from wandb run
-        df = load_data(run_id=run['run_id'],
                     run_path=run['run_path'],
                     load=load,
-                    save=save,
-                    save = (run['state'] != 'running') & run['end_time']
                     ).assign(**run.to_dict())
-        # Clean and explode dataframe
-        df_long = explode_data(clean_data(df))
-        # Remove original dataframe from memory
-        del df
         # Get and save stats
-        return calculate_stats(df_long, freq=freq, save_path=stats_path)
     except Exception as e:
-        print(f'Error processing run {run["run_id"]}: {e}')
 if __name__ == '__main__':
     # TODO: flag to overwrite runs that were running when downloaded and saved: check if file date is older than run end time.
     filters = None# {"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
     # filters={'tags': {'$in': ['5F4tQyWrhfGVcNhoqeiNsR6KjD4wMZ2kfhLj4oHYuyHbZAc3']}} # Is foundation validator
-    df_runs = pull_wandb_runs(ntop=500, filters=filters)#summary_filters=lambda s: s.get('augment_prompt'))
     os.makedirs('data/runs/', exist_ok=True)
     os.makedirs('data/aggs/', exist_ok=True)
-    df_runs.to_csv('data/wandb.csv', index=False)
-    display(df_runs)
-    plot_gantt(df_runs)
-    with ProcessPoolExecutor(max_workers=min(32, df_runs.shape[0])) as executor:
-        futures = [executor.submit(process, run, load=True, save=True) for _, run in df_runs.iterrows()]
         # Use tqdm to add a progress bar
         results = []
@@ -208,30 +299,39 @@ if __name__ == '__main__':
                     result = future.result()
                     results.append(result)
                 except Exception as e:
-                    print(f'generated an exception: {print_exc(e)}')
                 pbar.update(1)
     if not results:
         raise ValueError('No runs were successfully processed.')
    # Concatenate the results into a single dataframe
-    df = pd.concat(results, ignore_index=True)
     df.to_csv('data/processed.csv', index=False)
     display(df)
-    fig = px.line(df.astype({'_timestamp':str}),
-              x='_timestamp',
-              y='completions_diversity',
-            #   y=['Unique','Total'],
-        line_group='run_id',
-        # color='hotkey',
-        # color_discrete_sequence=px.colors.sequential.YlGnBu,
-        title='Completion Diversity over Time',
-        labels={'_timestamp':'', 'completions_diversity':'Diversity', 'uids':'UID','value':'counts', 'variable':'Completions'},
-        width=800, height=600,
-        template='plotly_white',
-        ).update_traces(opacity=0.3)
-    fig.show()

 import os
 import re
+import argparse
 import tqdm
 import wandb
+from traceback import format_exc
 import plotly.express as px
 import pandas as pd
 from concurrent.futures import ProcessPoolExecutor
 import opendashboards.utils.utils as utils
+import opendashboards.utils.aggregate as aggregate
 from IPython.display import display
 api= wandb.Api(timeout=60)
 wandb.login(anonymous="allow")
+def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
     # TODO: speed this up by storing older runs
     all_runs = api.runs(project, filters=filters)
     print(f'Using {ntop}/{len(all_runs)} runs with more than {min_steps} events')
     pbar = tqdm.tqdm(all_runs)
         summary = run.summary
         if summary_filters is not None and not summary_filters(summary):
             continue
+        if netuid is not None and summary.get('netuid') != netuid:
+            continue
         step = summary.get('_step',0)
         if step < min_steps or step > max_steps:
             # warnings.warn(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
             'start_time': pd.to_datetime(end_time-duration, unit="s"),
             'end_time': pd.to_datetime(end_time, unit="s"),
             'duration': pd.to_timedelta(duration, unit="s").round('s'),
+            'netuid': run.config.get('netuid'),
             **tags
         })
         n_events += step
     fig.update_yaxes(tickfont_size=8, title='')
     fig.show()
+def clean_data(df):
+    return df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any').dropna(axis=1, how='all')
+def explode_data(df):
+    list_cols = utils.get_list_col_lengths(df)
+    return utils.explode_data(df, list(list_cols.keys())).apply(pd.to_numeric, errors='ignore')
+def load_data(run_id, run_path=None, load=True, save=False, explode=True):
+    file_path = os.path.join('data/runs/',f'history-{run_id}.parquet')
     if load and os.path.exists(file_path):
+        df = pd.read_parquet(file_path)
         # filter out events with missing step length
         df = df.loc[df.step_length.notna()]
         # detect list columns which as stored as strings
         list_cols = [c for c in df.columns if df[c].dtype == "object" and df[c].str.startswith("[").all()]
         # convert string representation of list to list
+        # df[list_cols] = df[list_cols].apply(lambda x: eval(x, {'__builtins__': None}) if pd.notna(x) else x)
+        try:
+            df[list_cols] = df[list_cols].applymap(eval, na_action='ignore')
+        except ValueError as e:
+            print(f'Error loading {file_path!r} when converting columns {list_cols} to list: {e}')
     else:
         # Download the history from wandb and add metadata
         run = api.run(run_path)
         df = pd.DataFrame(list(run.scan_history()))
+        # Remove rows with missing completions or rewards, which will be stuff related to weights
+        df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any', inplace=True)
         print(f'Downloaded {df.shape[0]} events from {run_path!r} with id {run_id!r}')
+        # Clean and explode dataframe
+        # overwrite object to free memory
+        float_cols = df.filter(regex='reward').columns
+        df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
         if save:
+            df.to_parquet(file_path, index=False)
     # Convert timestamp to datetime.
     df._timestamp = pd.to_datetime(df._timestamp, unit="s")
     return df.sort_values("_timestamp")
+def calculate_stats(df_long, freq='H', save_path=None, ntop=3 ):
     df_long._timestamp = pd.to_datetime(df_long._timestamp)
     # if dataframe has columns such as followup_completions and answer_completions, convert to multiple rows
     if 'completions' not in df_long.columns:
         df_long.set_index(['_timestamp','run_id'], inplace=True)
         ])
         df_long = df_schema.reset_index()
     print(f'Calculating stats for dataframe with shape {df_long.shape}')
+    # Approximate number of tokens in each completion
+    df_long['completion_num_tokens'] = (df_long['completions'].str.split().str.len() / 0.75).round()
     g = df_long.groupby([pd.Grouper(key='_timestamp', axis=0, freq=freq), 'run_id'])
+    # TODO: use named aggregations
+    reward_aggs = ['sum','mean','std','median','max',aggregate.nonzero_rate, aggregate.nonzero_mean, aggregate.nonzero_std, aggregate.nonzero_median]
+    aggs = {
+        'completions': ['nunique','count', aggregate.diversity, aggregate.successful_diversity, aggregate.success_rate],
+        'completion_num_tokens': ['mean', 'std', 'median', 'max'],
+        **{k: reward_aggs for k in df_long.filter(regex='reward')}
+    }
+    # Calculate tokens per second
+    if 'completion_times' in df_long.columns:
+        df_long['tokens_per_sec'] = df_long['completion_num_tokens']/df_long['completion_times']
+        aggs.update({
+            'completion_times': ['mean','std','median','min','max'],
+            'tokens_per_sec': ['mean','std','median','max'],
+        })
+    stats = g.agg(aggs)
+    stats = stats.merge(g.apply(aggregate.top_stats, exclude='', ntop=ntop).reset_index(level=1,drop=True), left_index=True, right_index=True)
+    # flatten multiindex columns
     stats.columns = ['_'.join(c) for c in stats.columns]
     stats = stats.reset_index()
+    if save_path:
         stats.to_csv(save_path, index=False)
     return stats
+def process(run, load=True, save=False, load_stats=True, freq='H', ntop=3):
     try:
         stats_path = f'data/aggs/stats-{run["run_id"]}.csv'
+        if load_stats and os.path.exists(stats_path):
+            print(f'Loaded stats file {stats_path!r}')
             return pd.read_csv(stats_path)
         # Load data and add extra columns from wandb run
+        df_long = load_data(run_id=run['run_id'],
                     run_path=run['run_path'],
                     load=load,
+                    save=save,
+                    # save = (run['state'] != 'running') & run['end_time']
                     ).assign(**run.to_dict())
+        assert isinstance(df_long, pd.DataFrame), f'Expected dataframe, but got {type(df_long)}'
         # Get and save stats
+        return calculate_stats(df_long, freq=freq, save_path=stats_path, ntop=ntop)
     except Exception as e:
+        print(f'Error processing run {run["run_id"]}: { format_exc(e) }')
+def line_chart(df, col, title=None):
+    title = title or col.replace('_',' ').title()
+    fig = px.line(df.astype({'_timestamp':str}),
+            x='_timestamp', y=col,
+            line_group='run_id',
+            title=f'{title} over Time',
+            labels={'_timestamp':'', col: title, 'uids':'UID','value':'counts', 'variable':'Completions'},
+            width=800, height=600,
+            template='plotly_white',
+        ).update_traces(opacity=0.2)
+    fig.write_image(f'data/figures/{col}.png')
+    fig.write_html(f'data/figures/{col}.html')
+    return col
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='Process wandb validator runs for a given netuid.')
+    parser.add_argument('--load_runs',action='store_true', help='Load runs from file.')
+    parser.add_argument('--repull_unfinished',action='store_true', help='Re-pull runs that were running when downloaded and saved.')
+    parser.add_argument('--netuid', type=int, default=None, help='Network UID to use.')
+    parser.add_argument('--ntop', type=int, default=1000, help='Number of runs to process.')
+    parser.add_argument('--min_steps', type=int, default=100, help='Minimum number of steps to include.')
+    parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
+    parser.add_argument('--no_plot',action='store_true', help='Prevent plotting.')
+    parser.add_argument('--no_save',action='store_true', help='Prevent saving data to file.')
+    parser.add_argument('--no_load',action='store_true', help='Prevent loading downloaded data from file.')
+    parser.add_argument('--no_load_stats',action='store_true', help='Prevent loading stats data from file.')
+    parser.add_argument('--freq', type=str, default='H', help='Frequency to aggregate data.')
+    parser.add_argument('--completions_ntop', type=int, default=3, help='Number of top completions to include in stats.')
+    return parser.parse_args()
 if __name__ == '__main__':
     # TODO: flag to overwrite runs that were running when downloaded and saved: check if file date is older than run end time.
+    args = parse_arguments()
+    print(args)
     filters = None# {"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
     # filters={'tags': {'$in': ['5F4tQyWrhfGVcNhoqeiNsR6KjD4wMZ2kfhLj4oHYuyHbZAc3']}} # Is foundation validator
+    if args.load_runs and os.path.exists('data/wandb.csv'):
+        df_runs = pd.read_csv('data/wandb.csv')
+        assert len(df_runs) >= args.ntop, f'Loaded {len(df_runs)} runs, but expected at least {args.ntop}'
+        df_runs = df_runs.iloc[:args.ntop]
+    else:
+        df_runs = pull_wandb_runs(ntop=args.ntop,
+                                min_steps=args.min_steps,
+                                netuid=args.netuid,
+                                filters=filters
+                                )#summary_filters=lambda s: s.get('augment_prompt'))
+        df_runs.to_csv('data/wandb.csv', index=False)
     os.makedirs('data/runs/', exist_ok=True)
     os.makedirs('data/aggs/', exist_ok=True)
+    os.makedirs('data/figures/', exist_ok=True)
+    display(df_runs)
+    if not args.no_plot:
+        plot_gantt(df_runs)
+    with ProcessPoolExecutor(max_workers=min(args.max_workers, df_runs.shape[0])) as executor:
+        futures = [executor.submit(
+                            process,
+                            run,
+                            load=not args.no_load,
+                            save=not args.no_save,
+                            load_stats=not args.no_load_stats,
+                            freq=args.freq,
+                            ntop=args.completions_ntop
+                    )
+                   for _, run in df_runs.iterrows()
+                   ]
         # Use tqdm to add a progress bar
         results = []
                     result = future.result()
                     results.append(result)
                 except Exception as e:
+                    print(f'generated an exception: {format_exc(e)}')
                 pbar.update(1)
     if not results:
         raise ValueError('No runs were successfully processed.')
    # Concatenate the results into a single dataframe
+    df = pd.concat(results, ignore_index=True).sort_values(['_timestamp','run_id'], ignore_index=True)
     df.to_csv('data/processed.csv', index=False)
+    print(f'Saved {df.shape[0]} rows to data/processed.csv')
     display(df)
+    if not args.no_plot:
+        plots = []
+        cols = df.set_index(['run_id','_timestamp']).columns
+        with ProcessPoolExecutor(max_workers=min(args.max_workers, len(cols))) as executor:
+            futures = [executor.submit(line_chart, df, c) for c in cols]
+            # Use tqdm to add a progress bar
+            results = []
+            with tqdm.tqdm(total=len(futures)) as pbar:
+                for future in futures:
+                    try:
+                        result = future.result()
+                        plots.append(result)
+                    except Exception as e:
+                        print(f'generated an exception: {format_exc(e)}')
+                    pbar.update(1)
+        print(f'Saved {len(plots)} plots to data/figures/')