steffenc commited on
Commit
d048dd7
·
unverified ·
2 Parent(s): 163582f 507e3f7

Merge pull request #4 from opentensor/aggregations

Browse files
dashboard.py CHANGED
@@ -1,13 +1,16 @@
 
 
1
  import streamlit as st
2
  from opendashboards.assets import io, inspect, metric, plot
3
 
4
- # dendrite time versus completion length
5
  # prompt-based completion score stats
6
  # instrospect specific RUN-UID-COMPLETION
 
 
7
 
8
  DEFAULT_PROJECT = "openvalidators"
9
- DEFAULT_FILTERS = {"tags": {"$in": ["1.0.0", "1.0.1", "1.0.2", "1.0.3", "1.0.4"]}}
10
- DEFAULT_SELECTED_RUNS = ['kt9bzxii']
11
  DEFAULT_SRC = 'followup'
12
  DEFAULT_COMPLETION_NTOP = 10
13
  DEFAULT_UID_NTOP = 10
@@ -31,8 +34,6 @@ st.title('Validator :red[Analysis] Dashboard :eyes:')
31
  st.markdown('#')
32
  st.markdown('#')
33
 
34
- # with st.sidebar:
35
- # st.sidebar.header('Pages')
36
 
37
  with st.spinner(text=f'Checking wandb...'):
38
  df_runs = io.load_runs(project=DEFAULT_PROJECT, filters=DEFAULT_FILTERS, min_steps=10)
@@ -43,58 +44,83 @@ metric.wandb(df_runs)
43
  st.markdown('#')
44
  st.markdown('#')
45
 
46
- tab1, tab2, tab3, tab4 = st.tabs(["Wandb Runs", "UID Health", "Completions", "Prompt-based scoring"])
47
-
48
 
49
  ### Wandb Runs ###
50
  with tab1:
51
 
52
  st.markdown('#')
53
- st.header(":violet[Wandb] Runs")
54
-
55
- run_msg = st.info("Select a single run or compare multiple runs")
56
- selected_runs = st.multiselect(f'Runs ({len(df_runs)})', df_runs.id, default=DEFAULT_SELECTED_RUNS, key='runs')
57
-
58
- # Load data if new runs selected
59
- if not selected_runs:
60
- # open a dialog to select runs
61
- run_msg.error("Please select at least one run")
62
- st.snow()
 
 
 
 
 
 
 
 
 
63
  st.stop()
64
 
65
- df = io.load_data(df_runs.loc[df_runs.id.isin(selected_runs)], load=True, save=True)
66
- df_long = inspect.explode_data(df)
67
- df_weights = inspect.weights(df)
68
-
69
- metric.runs(df, df_long, selected_runs)
70
 
71
- with st.expander(f'Show :violet[raw] data for {len(selected_runs)} selected runs'):
72
- inspect.run_event_data(df_runs,df, selected_runs)
 
 
 
 
 
 
73
 
 
74
 
75
  ### UID Health ###
 
 
76
  with tab2:
77
 
78
  st.markdown('#')
79
- st.header("UID :violet[Health]")
80
- st.info(f"Showing UID health metrics for **{len(selected_runs)} selected runs**")
81
 
82
- uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- metric.uids(df_long, uid_src)
85
 
86
- with st.expander(f'Show UID **{uid_src}** weights data for **{len(selected_runs)} selected runs**'):
 
 
 
 
87
 
88
- uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
89
- st.markdown('#')
90
- st.subheader(f"UID {uid_src.title()} :violet[Weights]")
91
 
92
- plot.weights(
93
- df_weights,
94
- uids=uids,
95
- )
96
-
97
- with st.expander(f'Show UID **{uid_src}** leaderboard data for **{len(selected_runs)} selected runs**'):
98
 
99
  st.markdown('#')
100
  st.subheader(f"UID {uid_src.title()} :violet[Leaderboard]")
@@ -103,15 +129,15 @@ with tab2:
103
  uid_agg = uid_col2.selectbox('Aggregation:', ('mean','min','max','size','nunique'), key='uid_agg')
104
 
105
  plot.leaderboard(
106
- df,
107
  ntop=uid_ntop,
108
- group_on=f'{uid_src}_uids',
109
- agg_col=f'{uid_src}_rewards',
110
  agg=uid_agg
111
  )
112
 
113
 
114
- with st.expander(f'Show UID **{uid_src}** diversity data for **{len(selected_runs)} selected runs**'):
115
 
116
  st.markdown('#')
117
  st.subheader(f"UID {uid_src.title()} :violet[Diversity]")
@@ -127,28 +153,27 @@ with tab3:
127
  completion_info = st.empty()
128
 
129
  msg_col1, msg_col2 = st.columns(2)
130
- completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
131
- completion_info.info(f"Showing **{completion_src}** completions for **{len(selected_runs)} selected runs**")
 
132
 
133
- completion_ntop = msg_col2.slider('Top k:', min_value=1, max_value=50, value=DEFAULT_COMPLETION_NTOP, key='completion_ntop')
134
 
135
- completion_col = f'{completion_src}_completions'
136
- reward_col = f'{completion_src}_rewards'
137
- uid_col = f'{completion_src}_uids'
138
 
139
- completions = inspect.completions(df_long, completion_col)
140
 
141
  # Get completions with highest average rewards
142
  plot.leaderboard(
143
- df,
144
  ntop=completion_ntop,
145
- group_on=completion_col,
146
- agg_col=reward_col,
147
  agg='mean',
148
  alias=True
149
  )
150
 
151
- with st.expander(f'Show **{completion_src}** completion rewards data for **{len(selected_runs)} selected runs**'):
152
 
153
  st.markdown('#')
154
  st.subheader('Completion :violet[Rewards]')
@@ -157,19 +182,37 @@ with tab3:
157
  # completion_regex = st.text_input('Completion regex:', value='', key='completion_regex')
158
 
159
  plot.completion_rewards(
160
- df,
161
- completion_col=completion_col,
162
- reward_col=reward_col,
163
- uid_col=uid_col,
164
  ntop=completion_ntop,
165
  completions=completion_select,
166
  )
 
 
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  ### Prompt-based scoring ###
170
  with tab4:
171
  # coming soon
172
  st.info('Prompt-based scoring coming soon')
 
173
 
174
  # st.dataframe(df_long_long.filter(regex=prompt_src).head())
175
 
 
1
+ import time
2
+ import pandas as pd
3
  import streamlit as st
4
  from opendashboards.assets import io, inspect, metric, plot
5
 
 
6
  # prompt-based completion score stats
7
  # instrospect specific RUN-UID-COMPLETION
8
+ # cache individual file loads
9
+ # Hotkey churn
10
 
11
  DEFAULT_PROJECT = "openvalidators"
12
+ DEFAULT_FILTERS = {"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
13
+ DEFAULT_SELECTED_HOTKEYS = None
14
  DEFAULT_SRC = 'followup'
15
  DEFAULT_COMPLETION_NTOP = 10
16
  DEFAULT_UID_NTOP = 10
 
34
  st.markdown('#')
35
  st.markdown('#')
36
 
 
 
37
 
38
  with st.spinner(text=f'Checking wandb...'):
39
  df_runs = io.load_runs(project=DEFAULT_PROJECT, filters=DEFAULT_FILTERS, min_steps=10)
 
44
  st.markdown('#')
45
  st.markdown('#')
46
 
47
+ tab1, tab2, tab3, tab4 = st.tabs(["Raw Data", "UID Health", "Completions", "Prompt-based scoring"])
 
48
 
49
  ### Wandb Runs ###
50
  with tab1:
51
 
52
  st.markdown('#')
53
+ st.subheader(":violet[Run] Data")
54
+ with st.expander(f'Show :violet[raw] wandb data'):
55
+
56
+ edited_df = st.data_editor(
57
+ df_runs.assign(Select=False).set_index('Select'),
58
+ column_config={"Select": st.column_config.CheckboxColumn(required=True)},
59
+ disabled=df_runs.columns,
60
+ use_container_width=True,
61
+ )
62
+ df_runs_subset = df_runs[edited_df.index==True]
63
+ n_runs = len(df_runs_subset)
64
+
65
+ if n_runs:
66
+ df = io.load_data(df_runs_subset, load=True, save=True)
67
+ df = inspect.clean_data(df)
68
+ print(f'\nNans in columns: {df.isna().sum()}')
69
+ df_long = inspect.explode_data(df)
70
+ else:
71
+ st.info(f'You must select at least one run to load data')
72
  st.stop()
73
 
74
+ metric.runs(df_long)
 
 
 
 
75
 
76
+ st.markdown('#')
77
+ st.subheader(":violet[Event] Data")
78
+ with st.expander(f'Show :violet[raw] event data for **{n_runs} selected runs**'):
79
+ raw_data_col1, raw_data_col2 = st.columns(2)
80
+ use_long_checkbox = raw_data_col1.checkbox('Use long format', value=True)
81
+ num_rows = raw_data_col2.slider('Number of rows:', min_value=1, max_value=100, value=10, key='num_rows')
82
+ st.dataframe(df_long.head(num_rows) if use_long_checkbox else df.head(num_rows),
83
+ use_container_width=True)
84
 
85
+ step_types = ['all']+['augment','followup','answer']#list(df.name.unique())
86
 
87
  ### UID Health ###
88
+ # TODO: Live time - time elapsed since moving_averaged_score for selected UID was 0 (lower bound so use >Time)
89
+ # TODO: Weight - Most recent weight for selected UID (Add warning if weight is 0 or most recent timestamp is not current)
90
  with tab2:
91
 
92
  st.markdown('#')
93
+ st.subheader("UID :violet[Health]")
94
+ st.info(f"Showing UID health metrics for **{n_runs} selected runs**")
95
 
96
+ uid_src = st.radio('Select event type:', step_types, horizontal=True, key='uid_src')
97
+ df_uid = df_long[df_long.name.str.contains(uid_src)] if uid_src != 'all' else df_long
98
+
99
+ metric.uids(df_uid, uid_src)
100
+ uids = st.multiselect('UID:', sorted(df_uid['uids'].unique()), key='uid')
101
+ with st.expander(f'Show UID health data for **{n_runs} selected runs** and **{len(uids)} selected UIDs**'):
102
+ st.markdown('#')
103
+ st.subheader(f"UID {uid_src.title()} :violet[Health]")
104
+ agg_uid_checkbox = st.checkbox('Aggregate UIDs', value=True)
105
+ if agg_uid_checkbox:
106
+ metric.uids(df_uid, uid_src, uids)
107
+ else:
108
+ for uid in uids:
109
+ st.caption(f'UID: {uid}')
110
+ metric.uids(df_uid, uid_src, [uid])
111
 
112
+ st.subheader(f'Cumulative completion frequency')
113
 
114
+ freq_col1, freq_col2 = st.columns(2)
115
+ freq_ntop = freq_col1.slider('Number of Completions:', min_value=10, max_value=1000, value=100, key='freq_ntop')
116
+ freq_rm_empty = freq_col2.checkbox('Remove empty (failed)', value=True, key='freq_rm_empty')
117
+ freq_cumulative = freq_col2.checkbox('Cumulative', value=False, key='freq_cumulative')
118
+ freq_normalize = freq_col2.checkbox('Normalize', value=True, key='freq_normalize')
119
 
120
+ plot.uid_completion_counts(df_uid, uids=uids, src=uid_src, ntop=freq_ntop, rm_empty=freq_rm_empty, cumulative=freq_cumulative, normalize=freq_normalize)
 
 
121
 
122
+
123
+ with st.expander(f'Show UID **{uid_src}** leaderboard data for **{n_runs} selected runs**'):
 
 
 
 
124
 
125
  st.markdown('#')
126
  st.subheader(f"UID {uid_src.title()} :violet[Leaderboard]")
 
129
  uid_agg = uid_col2.selectbox('Aggregation:', ('mean','min','max','size','nunique'), key='uid_agg')
130
 
131
  plot.leaderboard(
132
+ df_uid,
133
  ntop=uid_ntop,
134
+ group_on='uids',
135
+ agg_col='rewards',
136
  agg=uid_agg
137
  )
138
 
139
 
140
+ with st.expander(f'Show UID **{uid_src}** diversity data for **{n_runs} selected runs**'):
141
 
142
  st.markdown('#')
143
  st.subheader(f"UID {uid_src.title()} :violet[Diversity]")
 
153
  completion_info = st.empty()
154
 
155
  msg_col1, msg_col2 = st.columns(2)
156
+ # completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
157
+ completion_src = st.radio('Select event type:', step_types, horizontal=True, key='completion_src')
158
+ df_comp = df_long[df_long.name.str.contains(completion_src)] if completion_src != 'all' else df_long
159
 
160
+ completion_info.info(f"Showing **{completion_src}** completions for **{n_runs} selected runs**")
161
 
162
+ completion_ntop = msg_col2.slider('Top k:', min_value=1, max_value=50, value=DEFAULT_COMPLETION_NTOP, key='completion_ntop')
 
 
163
 
164
+ completions = inspect.completions(df_long, 'completions')
165
 
166
  # Get completions with highest average rewards
167
  plot.leaderboard(
168
+ df_comp,
169
  ntop=completion_ntop,
170
+ group_on='completions',
171
+ agg_col='rewards',
172
  agg='mean',
173
  alias=True
174
  )
175
 
176
+ with st.expander(f'Show **{completion_src}** completion rewards data for **{n_runs} selected runs**'):
177
 
178
  st.markdown('#')
179
  st.subheader('Completion :violet[Rewards]')
 
182
  # completion_regex = st.text_input('Completion regex:', value='', key='completion_regex')
183
 
184
  plot.completion_rewards(
185
+ df_comp,
186
+ completion_col='completions',
187
+ reward_col='rewards',
188
+ uid_col='uids',
189
  ntop=completion_ntop,
190
  completions=completion_select,
191
  )
192
+ # TODO: show the UIDs which have used the selected completions
193
+
194
 
195
+ with st.expander(f'Show **{completion_src}** completion length data for **{n_runs} selected runs**'):
196
+
197
+ st.markdown('#')
198
+ st.subheader('Completion :violet[Length]')
199
+
200
+ completion_length_radio = st.radio('Use: ', ['characters','words','sentences'], key='completion_length_radio')
201
+
202
+ # Todo: use color to identify selected completions/ step names/ uids
203
+ plot.completion_length_time(
204
+ df_comp,
205
+ completion_col='completions',
206
+ uid_col='uids',
207
+ time_col='completion_times',
208
+ length_opt=completion_length_radio,
209
+ )
210
 
211
  ### Prompt-based scoring ###
212
  with tab4:
213
  # coming soon
214
  st.info('Prompt-based scoring coming soon')
215
+ st.snow()
216
 
217
  # st.dataframe(df_long_long.filter(regex=prompt_src).head())
218
 
meta_plotting.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import plotly.express as px
3
+
4
+ def plot_trace(df, col='emission', agg='mean', time_col='timestamp', ntop=10, hotkeys=None, hotkey_regex=None, abbrev=8, type='Miners'):
5
+
6
+ if hotkeys is not None:
7
+ df = df.loc[df.hotkey.isin(hotkeys)]
8
+ if hotkey_regex is not None:
9
+ df = df.loc[df.hotkey.str.contains(hotkey_regex)]
10
+
11
+ top_miners = df.groupby('hotkey')[col].agg(agg).sort_values(ascending=False)
12
+
13
+ stats = df.loc[df.hotkey.isin(top_miners.index[:ntop])].sort_values(by=time_col)
14
+
15
+ stats['hotkey_abbrev'] = stats.hotkey.str[:abbrev]
16
+ stats['coldkey_abbrev'] = stats.coldkey.str[:abbrev]
17
+ stats['rank'] = stats.hotkey.map({k:i for i,k in enumerate(top_miners.index, start=1)})
18
+
19
+ return px.line(stats.sort_values(by=[time_col,'rank']),
20
+ x=time_col, y=col, color='coldkey_abbrev', line_group='hotkey_abbrev',
21
+ hover_data=['hotkey','rank'],
22
+ labels={col:col.title(),'timestamp':'','coldkey_abbrev':f'Coldkey (first {abbrev} chars)','hotkey_abbrev':f'Hotkey (first {abbrev} chars)'},
23
+ title=f'Top {ntop} {type}, by {col.title()}',
24
+ template='plotly_white', width=800, height=600,
25
+ ).update_traces(opacity=0.7)
26
+
27
+
28
+ def plot_cabals(df, sel_col='coldkey', count_col='hotkey', time_col='timestamp', values=None, ntop=10, abbr=8):
29
+
30
+ if values is None:
31
+ values = df[sel_col].value_counts().sort_values(ascending=False).index[:ntop].tolist()
32
+ print(f'Automatically selected {sel_col!r} = {values!r}')
33
+
34
+ df = df.loc[df[sel_col].isin(values)]
35
+ rates = df.groupby([time_col,sel_col])[count_col].nunique().reset_index()
36
+ abbr_col = f'{sel_col} (first {abbr} chars)'
37
+ rates[abbr_col] = rates[sel_col].str[:abbr]
38
+ return px.line(rates.melt(id_vars=[time_col,sel_col,abbr_col]),
39
+ x=time_col, y='value', color=abbr_col,
40
+ #facet_col='variable', facet_col_wrap=1,
41
+ labels={'value':f'Number of Unique {count_col.title()}s per {sel_col.title()}','timestamp':''},
42
+ category_orders={abbr_col:[ v[:abbr] for v in values]},
43
+ # title=f'Unique {count_col.title()}s Associated with Selected {sel_col.title()}s in Metagraph',
44
+ title=f'Impact of Validators Update on Cabal',
45
+ width=800, height=600, template='plotly_white',
46
+ )
47
+
48
+
meta_utils.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import tqdm
4
+ import dill as pickle
5
+ import subprocess
6
+ import pandas as pd
7
+ import datetime
8
+ from functools import lru_cache
9
+
10
+ block_time_500k = datetime.datetime(2023, 5, 29, 5, 29, 0)
11
+ block_time_800k = datetime.datetime(2023, 7, 9, 21, 32, 48)
12
+ dt = (pd.Timestamp(block_time_800k)-pd.Timestamp(block_time_500k))/(800_000-500_000)
13
+
14
+ def run_subprocess(*args):
15
+ # Trigger the multigraph.py script to run and save metagraph snapshots
16
+ return subprocess.run('python multigraph.py'.split()+list(args),
17
+ shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
18
+
19
+ def load_metagraph(path, extra_cols=None, rm_cols=None):
20
+
21
+ with open(path, 'rb') as f:
22
+ metagraph = pickle.load(f)
23
+
24
+ df = pd.DataFrame(metagraph.axons)
25
+ df['block'] = metagraph.block.item()
26
+ df['timestamp'] = block_time_500k + dt*(df['block']-500_000)
27
+ df['difficulty'] = metagraph.difficulty
28
+ for c in extra_cols:
29
+ vals = getattr(metagraph,c)
30
+ df[c] = vals
31
+
32
+ return df.drop(columns=rm_cols)
33
+
34
+ @lru_cache(maxsize=16)
35
+ def load_metagraphs(block_start, block_end, block_step=1000, datadir='data/metagraph/1/', extra_cols=None):
36
+
37
+ if extra_cols is None:
38
+ extra_cols = ['total_stake','ranks','incentive','emission','consensus','trust','validator_trust','dividends']
39
+
40
+ blocks = range(block_start, block_end, block_step)
41
+ print(f'Loading blocks {blocks[0]}-{blocks[-1]} from {datadir}')
42
+ filenames = sorted(filename for filename in os.listdir(datadir) if int(filename.split('.')[0]) in blocks)
43
+ print(f'Found {len(filenames)} files in {datadir}')
44
+
45
+ metagraphs = []
46
+
47
+ pbar = tqdm.tqdm(filenames)
48
+ for filename in pbar:
49
+ pbar.set_description(f'Processing {filename}')
50
+
51
+ try:
52
+ metagraph = load_metagraph(os.path.join(datadir, filename), extra_cols=extra_cols, rm_cols=['protocol','placeholder1','placeholder2'])
53
+
54
+ metagraphs.append(metagraph)
55
+ except Exception as e:
56
+ print(f'filename {filename!r} generated an exception: { e }')
57
+
58
+ return pd.concat(metagraphs)
59
+
metadash.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import streamlit as st
4
+ from meta_utils import run_subprocess, load_metagraphs
5
+ # from opendashboards.assets import io, inspect, metric, plot
6
+ from meta_plotting import plot_trace, plot_cabals
7
+ import asyncio
8
+
9
+ ## TODO: Read blocks from a big parquet file instead of loading all the pickles -- this is slow
10
+
11
+ def get_or_create_eventloop():
12
+ try:
13
+ return asyncio.get_event_loop()
14
+ except RuntimeError as ex:
15
+ if "There is no current event loop in thread" in str(ex):
16
+ loop = asyncio.new_event_loop()
17
+ asyncio.set_event_loop(loop)
18
+ return asyncio.get_event_loop()
19
+
20
+ loop = asyncio.new_event_loop()
21
+ asyncio.set_event_loop(loop)
22
+ import bittensor
23
+
24
+ datadir='data/metagraph/1/'
25
+ blockfiles = sorted(int(filename.split('.')[0]) for filename in os.listdir(datadir))
26
+ DEFAULT_SRC = 'miner'
27
+ DEFAULT_BLOCK_START = blockfiles[0]
28
+ DEFAULT_BLOCK_END = blockfiles[-1]
29
+ DEFAULT_BLOCK_STEP = 1000
30
+ DEFAULT_NTOP = 10
31
+ DEFAULT_UID_NTOP = 10
32
+
33
+ # Set app config
34
+ st.set_page_config(
35
+ page_title='Validator Dashboard',
36
+ menu_items={
37
+ 'Report a bug': "https://github.com/opentensor/dashboards/issues",
38
+ 'About': """
39
+ This dashboard is part of the OpenTensor project. \n
40
+ """
41
+ },
42
+ layout = "centered"
43
+ )
44
+
45
+ st.title('Metagraph :red[Analysis] Dashboard :eyes:')
46
+ # add vertical space
47
+ st.markdown('#')
48
+ st.markdown('#')
49
+
50
+ subtensor = bittensor.subtensor(network='finney')
51
+ current_block = subtensor.get_current_block()
52
+ current_difficulty = subtensor.difficulty(1, block=current_block)
53
+
54
+ bcol1, bcol2, bcol3 = st.columns([0.2, 0.6, 0.2])
55
+ with bcol1:
56
+ st.metric('Current **block**', current_block, delta='+7200 [24hr]')
57
+ # st.metric('Current **difficulty**', f'{current_difficulty/10e12:.0}T', delta='?')
58
+
59
+
60
+ block_start, block_end = bcol2.select_slider(
61
+ 'Select a **block range**',
62
+ options=blockfiles,
63
+ value=(DEFAULT_BLOCK_START, DEFAULT_BLOCK_END),
64
+ format_func=lambda x: f'{x:,}'
65
+ )
66
+
67
+ bcol3.button('Refresh', on_click=run_subprocess)
68
+
69
+
70
+ with st.spinner(text=f'Loading data...'):
71
+ # df = load_metagraphs(block_start=block_start, block_end=block_end, block_step=DEFAULT_BLOCK_STEP)
72
+ df = pd.read_parquet('blocks_600100_807300_100')
73
+
74
+ blocks = df.block.unique()
75
+
76
+ df_sel = df.loc[df.block.between(block_start, block_end)]
77
+
78
+
79
+ # add vertical space
80
+ st.markdown('#')
81
+ st.markdown('#')
82
+
83
+ tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Miners", "Validators", "Block"])
84
+
85
+ miner_choices = ['total_stake','ranks','incentive','emission','consensus','trust','validator_trust','dividends']
86
+ cabal_choices = ['hotkey','ip','coldkey']
87
+
88
+ ### Overview ###
89
+ with tab1:
90
+
91
+ x_col = st.radio('X-axis', ['block','timestamp'], index=0, horizontal=True)
92
+
93
+ acol1, acol2 = st.columns([0.3, 0.7])
94
+ sel_ntop = acol1.slider('Number:', min_value=1, max_value=50, value=10, key='sel_ntop')
95
+ #horizontal list
96
+ miner_choice = acol2.radio('Select:', miner_choices, horizontal=True, index=0)
97
+ st.plotly_chart(
98
+ plot_trace(df_sel, time_col=x_col,col=miner_choice, ntop=sel_ntop),
99
+ use_container_width=True
100
+ )
101
+
102
+ col1, col2 = st.columns(2)
103
+ count_col = col1.radio('Count', cabal_choices, index=0, horizontal=True)
104
+ y_col = col2.radio('Agg on', cabal_choices, index=2, horizontal=True)
105
+
106
+ st.plotly_chart(
107
+ plot_cabals(df_sel, time_col=x_col, count_col=count_col, sel_col=y_col, ntop=sel_ntop),
108
+ use_container_width=True
109
+ )
110
+
111
+ with tab2:
112
+
113
+ # plot of miner weights versus time/block
114
+ pass
multigraph.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import argparse
4
+ from traceback import print_exc
5
+ import pickle
6
+ import tqdm
7
+ import pandas as pd
8
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
9
+
10
+ import torch
11
+ import bittensor
12
+
13
+ #TODO: make line charts and other cool stuff for each metagraph snapshot
14
+
15
+ def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
16
+
17
+ if subtensor is None:
18
+ subtensor = bittensor.subtensor(network='finney')
19
+
20
+ try:
21
+ metagraph = subtensor.metagraph(block=block, netuid=netuid, lite=lite)
22
+ if difficulty:
23
+ metagraph.difficulty = subtensor.difficulty(block=block, netuid=netuid)
24
+
25
+ if not lite:
26
+ if half:
27
+ metagraph.weights = torch.nn.Parameter(metagraph.weights.half(), requires_grad=False)
28
+ if prune_weights:
29
+ metagraph.weights = metagraph.weights[metagraph.weights.sum(axis=1) > 0]
30
+
31
+ with open(f'data/metagraph/{netuid}/{block}.pkl', 'wb') as f:
32
+ pickle.dump(metagraph, f)
33
+
34
+ return metagraph if return_graph else True
35
+
36
+ except Exception as e:
37
+ print(f'Error processing block {block}: {e}')
38
+
39
+
40
+ def parse_arguments():
41
+ parser = argparse.ArgumentParser(description='Process metagraphs for a given network.')
42
+ parser.add_argument('--netuid', type=int, default=1, help='Network UID to use.')
43
+ parser.add_argument('--difficulty', action='store_true', help='Include difficulty in metagraph.')
44
+ parser.add_argument('--prune_weights', action='store_true', help='Prune weights in metagraph.')
45
+ parser.add_argument('--return_graph', action='store_true', help='Return metagraph instead of True.')
46
+ parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
47
+ parser.add_argument('--start_block', type=int, default=1_000_000, help='Start block.')
48
+ parser.add_argument('--end_block', type=int, default=600_000, help='End block.')
49
+ parser.add_argument('--step_size', type=int, default=100, help='Step size.')
50
+ return parser.parse_args()
51
+
52
+ if __name__ == '__main__':
53
+
54
+ subtensor = bittensor.subtensor(network='finney')
55
+ print(f'Current block: {subtensor.block}')
56
+
57
+ args = parse_arguments()
58
+
59
+ netuid=args.netuid
60
+ difficulty=args.difficulty
61
+ overwrite=False
62
+ return_graph=args.return_graph
63
+
64
+ step_size = args.step_size
65
+ start_block = args.start_block
66
+ start_block = (min(subtensor.block, start_block)//step_size)*step_size # round to nearest step_size
67
+ end_block = args.end_block
68
+ blocks = range(start_block, end_block, -step_size)
69
+
70
+ # only get weights for multiple of 500 blocks
71
+ lite=lambda x: x%500!=0
72
+
73
+ max_workers = min(args.max_workers, len(blocks))
74
+
75
+ os.makedirs(f'data/metagraph/{netuid}', exist_ok=True)
76
+ if not overwrite:
77
+ blocks = [block for block in blocks if not os.path.exists(f'data/metagraph/{netuid}/{block}.pkl')]
78
+
79
+ metagraphs = []
80
+
81
+ if len(blocks)==0:
82
+ print(f'No blocks to process. Current block: {subtensor.block}')
83
+ quit()
84
+
85
+ print(f'Processing {len(blocks)} blocks from {blocks[0]}-{blocks[-1]} using {max_workers} workers.')
86
+
87
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
88
+ futures = [
89
+ executor.submit(process, block, lite=lite(block), netuid=netuid, difficulty=difficulty)
90
+ for block in blocks
91
+ ]
92
+
93
+ success = 0
94
+ with tqdm.tqdm(total=len(futures)) as pbar:
95
+ for block, future in zip(blocks,futures):
96
+ try:
97
+ metagraphs.append(future.result())
98
+ success += 1
99
+ except Exception as e:
100
+ print(f'generated an exception: {print_exc(e)}')
101
+ pbar.update(1)
102
+ pbar.set_description(f'Processed {success} blocks. Current block: {block}')
103
+
104
+ if not success:
105
+ raise ValueError('No blocks were successfully processed.')
106
+
107
+ print(f'Processed {success} blocks.')
108
+ if return_graph:
109
+ for metagraph in metagraphs:
110
+ print(f'{metagraph.block}: {metagraph.n.item()} nodes, difficulty={getattr(metagraph, "difficulty", None)}, weights={metagraph.weights.shape if hasattr(metagraph, "weights") else None}')
111
+
112
+ print(metagraphs[-1])
multistats.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import argparse
4
+ import tqdm
5
+ import wandb
6
+ import traceback
7
+ import plotly.express as px
8
+ import pandas as pd
9
+ from concurrent.futures import ProcessPoolExecutor
10
+
11
+ import opendashboards.utils.utils as utils
12
+ import opendashboards.utils.aggregate as aggregate
13
+
14
+ from IPython.display import display
15
+
16
+ api= wandb.Api(timeout=60)
17
+ wandb.login(anonymous="allow")
18
+
19
+ def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
20
+ # TODO: speed this up by storing older runs
21
+
22
+ all_runs = api.runs(project, filters=filters)
23
+ print(f'Using {ntop}/{len(all_runs)} runs with more than {min_steps} events')
24
+ pbar = tqdm.tqdm(all_runs)
25
+ runs = []
26
+ n_events = 0
27
+ successful = 0
28
+ for i, run in enumerate(pbar):
29
+
30
+ summary = run.summary
31
+ if summary_filters is not None and not summary_filters(summary):
32
+ continue
33
+ if netuid is not None and run.config.get('netuid') != netuid:
34
+ continue
35
+ step = summary.get('_step',0)
36
+ if step < min_steps or step > max_steps:
37
+ # warnings.warn(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
38
+ continue
39
+
40
+ prog_msg = f'Loading data {successful/ntop*100:.0f}% ({successful}/{ntop} runs, {n_events} events)'
41
+ pbar.set_description(f'{prog_msg}... **fetching** `{run.name}`')
42
+
43
+ duration = summary.get('_runtime')
44
+ end_time = summary.get('_timestamp')
45
+ # extract values for selected tags
46
+ rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
47
+ tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
48
+ # include bool flag for remaining tags
49
+ tags.update({k: True for k in run.tags if k not in tags.keys() and k not in tags.values()})
50
+
51
+ runs.append({
52
+ 'state': run.state,
53
+ 'num_steps': step,
54
+ 'num_completions': step*sum(len(v) for k, v in run.summary.items() if k.endswith('completions') and isinstance(v, list)),
55
+ 'entity': run.entity,
56
+ 'user': run.user.name,
57
+ 'username': run.user.username,
58
+ 'run_id': run.id,
59
+ 'run_name': run.name,
60
+ 'project': run.project,
61
+ 'run_url': run.url,
62
+ 'run_path': os.path.join(run.entity, run.project, run.id),
63
+ 'start_time': pd.to_datetime(end_time-duration, unit="s"),
64
+ 'end_time': pd.to_datetime(end_time, unit="s"),
65
+ 'duration': pd.to_timedelta(duration, unit="s").round('s'),
66
+ 'netuid': run.config.get('netuid'),
67
+ **tags
68
+ })
69
+ n_events += step
70
+ successful += 1
71
+ if successful >= ntop:
72
+ break
73
+
74
+ return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
75
+
76
+ def plot_gantt(df_runs):
77
+ fig = px.timeline(df_runs,
78
+ x_start="start_time", x_end="end_time", y="username", color="state",
79
+ title="Timeline of Runs",
80
+ category_orders={'run_name': df_runs.run_name.unique()},#,'username': sorted(df_runs.username.unique())},
81
+ hover_name="run_name",
82
+ hover_data=['hotkey','user','username','run_id','num_steps','num_completions'],
83
+ color_discrete_map={'running': 'green', 'finished': 'grey', 'killed':'blue', 'crashed':'orange', 'failed': 'red'},
84
+ opacity=0.3,
85
+ width=1200,
86
+ height=800,
87
+ template="plotly_white",
88
+ )
89
+ fig.update_yaxes(tickfont_size=8, title='')
90
+ fig.show()
91
+
92
+
93
+ def clean_data(df):
94
+ return df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any').dropna(axis=1, how='all')
95
+
96
+ def explode_data(df):
97
+ list_cols = utils.get_list_col_lengths(df)
98
+ return utils.explode_data(df, list(list_cols.keys())).apply(pd.to_numeric, errors='ignore')
99
+
100
+
101
+ def load_data(run_id, run_path=None, load=True, save=False, explode=True):
102
+
103
+ file_path = os.path.join('data/runs/',f'history-{run_id}.parquet')
104
+
105
+ if load and os.path.exists(file_path):
106
+ df = pd.read_parquet(file_path)
107
+ # filter out events with missing step length
108
+ df = df.loc[df.step_length.notna()]
109
+
110
+ # detect list columns which as stored as strings
111
+ ignore_cols = ('moving_averaged_scores')
112
+ list_cols = [c for c in df.columns if c not in ignore_cols and df[c].dtype == "object" and df[c].str.startswith("[").all()]
113
+ # convert string representation of list to list
114
+ # df[list_cols] = df[list_cols].apply(lambda x: eval(x, {'__builtins__': None}) if pd.notna(x) else x)
115
+ try:
116
+ df[list_cols] = df[list_cols].fillna('').applymap(eval, na_action='ignore')
117
+ except ValueError as e:
118
+ print(f'Error loading {file_path!r} when converting columns {list_cols} to list: {e}', flush=True)
119
+
120
+ else:
121
+ # Download the history from wandb and add metadata
122
+ run = api.run(run_path)
123
+ df = pd.DataFrame(list(run.scan_history()))
124
+
125
+ # Remove rows with missing completions or rewards, which will be stuff related to weights
126
+ df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any', inplace=True)
127
+
128
+ print(f'Downloaded {df.shape[0]} events from {run_path!r} with id {run_id!r}')
129
+
130
+ # Clean and explode dataframe
131
+ # overwrite object to free memory
132
+ float_cols = df.filter(regex='reward').columns
133
+ df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
134
+
135
+ if save:
136
+ df.to_parquet(file_path, index=False)
137
+
138
+ # Convert timestamp to datetime.
139
+ df._timestamp = pd.to_datetime(df._timestamp, unit="s")
140
+ return df.sort_values("_timestamp")
141
+
142
+
143
+ def calculate_stats(df_long, freq='H', save_path=None, ntop=3 ):
144
+
145
+ df_long._timestamp = pd.to_datetime(df_long._timestamp)
146
+
147
+ # if dataframe has columns such as followup_completions and answer_completions, convert to multiple rows
148
+ if 'completions' not in df_long.columns:
149
+ df_long.set_index(['_timestamp','run_id'], inplace=True)
150
+ df_schema = pd.concat([
151
+ df_long[['followup_completions','followup_rewards']].rename(columns={'followup_completions':'completions', 'followup_rewards':'rewards'}),
152
+ df_long[['answer_completions','answer_rewards']].rename(columns={'answer_completions':'completions', 'answer_rewards':'rewards'})
153
+ ])
154
+ df_long = df_schema.reset_index()
155
+
156
+ run_id = df_long['run_id'].iloc[0]
157
+ # print(f'Calculating stats for run {run_id!r} dataframe with shape {df_long.shape}')
158
+
159
+ # Approximate number of tokens in each completion
160
+ df_long['completion_num_tokens'] = (df_long['completions'].astype(str).str.split().str.len() / 0.75).round()
161
+
162
+ # TODO: use named aggregations
163
+ reward_aggs = ['sum','mean','std','median','max',aggregate.nonzero_rate, aggregate.nonzero_mean, aggregate.nonzero_std, aggregate.nonzero_median]
164
+ aggs = {
165
+ 'completions': ['nunique','count', aggregate.diversity, aggregate.successful_diversity, aggregate.success_rate],
166
+ 'completion_num_tokens': ['mean', 'std', 'median', 'max'],
167
+ **{k: reward_aggs for k in df_long.filter(regex='reward') if df_long[k].nunique() > 1}
168
+ }
169
+
170
+ # Calculate tokens per second
171
+ if 'completion_times' in df_long.columns:
172
+ df_long['tokens_per_sec'] = df_long['completion_num_tokens']/(df_long['completion_times']+1e-6)
173
+ aggs.update({
174
+ 'completion_times': ['mean','std','median','min','max'],
175
+ 'tokens_per_sec': ['mean','std','median','max'],
176
+ })
177
+
178
+ grouper = df_long.groupby(pd.Grouper(key='_timestamp', axis=0, freq=freq))
179
+ # carry out main aggregations
180
+ stats = grouper.agg(aggs)
181
+ # carry out multi-column aggregations using apply
182
+ diversity = grouper.apply(aggregate.successful_nonzero_diversity)
183
+ # carry out top completions aggregations using apply
184
+ top_completions = grouper.apply(aggregate.completion_top_stats, exclude='', ntop=ntop).unstack()
185
+
186
+ # combine all aggregations, which have the same index
187
+ stats = pd.concat([stats, diversity, top_completions], axis=1)
188
+
189
+ # flatten multiindex columns
190
+ stats.columns = ['_'.join([str(cc) for cc in c]) if isinstance(c, tuple) else str(c) for c in stats.columns]
191
+ stats = stats.reset_index().assign(run_id=run_id)
192
+
193
+ if save_path:
194
+ stats.to_csv(save_path, index=False)
195
+
196
+ return stats
197
+
198
+
199
+
200
+ def process(run, load=True, save=False, load_stats=True, freq='H', ntop=3):
201
+
202
+ try:
203
+
204
+ stats_path = f'data/aggs/stats-{run["run_id"]}.csv'
205
+ if load_stats and os.path.exists(stats_path):
206
+ print(f'Loaded stats file {stats_path!r}')
207
+ return pd.read_csv(stats_path)
208
+
209
+ # Load data and add extra columns from wandb run
210
+ df_long = load_data(run_id=run['run_id'],
211
+ run_path=run['run_path'],
212
+ load=load,
213
+ save=save,
214
+ # save = (run['state'] != 'running') & run['end_time']
215
+ ).assign(**run.to_dict())
216
+ assert isinstance(df_long, pd.DataFrame), f'Expected dataframe, but got {type(df_long)}'
217
+
218
+ # Get and save stats
219
+ return calculate_stats(df_long, freq=freq, save_path=stats_path, ntop=ntop)
220
+
221
+ except Exception as e:
222
+ print(f'Error processing run {run["run_id"]!r}:\t{e.__class__.__name__}: {e}',flush=True)
223
+ print(traceback.format_exc())
224
+
225
+ def line_chart(df, col, title=None):
226
+ title = title or col.replace('_',' ').title()
227
+ fig = px.line(df.astype({'_timestamp':str}),
228
+ x='_timestamp', y=col,
229
+ line_group='run_id',
230
+ title=f'{title} over Time',
231
+ labels={'_timestamp':'', col: title, 'uids':'UID','value':'counts', 'variable':'Completions'},
232
+ width=800, height=600,
233
+ template='plotly_white',
234
+ ).update_traces(opacity=0.2)
235
+
236
+ fig.write_image(f'data/figures/{col}.png')
237
+ fig.write_html(f'data/figures/{col}.html')
238
+ return col
239
+
240
+
241
+ def parse_arguments():
242
+ parser = argparse.ArgumentParser(description='Process wandb validator runs for a given netuid.')
243
+ parser.add_argument('--load_runs',action='store_true', help='Load runs from file.')
244
+ parser.add_argument('--repull_unfinished',action='store_true', help='Re-pull runs that were running when downloaded and saved.')
245
+ parser.add_argument('--netuid', type=int, default=None, help='Network UID to use.')
246
+ parser.add_argument('--ntop', type=int, default=1000, help='Number of runs to process.')
247
+ parser.add_argument('--min_steps', type=int, default=100, help='Minimum number of steps to include.')
248
+ parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
249
+ parser.add_argument('--no_plot',action='store_true', help='Prevent plotting.')
250
+ parser.add_argument('--no_save',action='store_true', help='Prevent saving data to file.')
251
+ parser.add_argument('--no_load',action='store_true', help='Prevent loading downloaded data from file.')
252
+ parser.add_argument('--no_load_stats',action='store_true', help='Prevent loading stats data from file.')
253
+ parser.add_argument('--freq', type=str, default='H', help='Frequency to aggregate data.')
254
+ parser.add_argument('--completions_ntop', type=int, default=3, help='Number of top completions to include in stats.')
255
+
256
+ return parser.parse_args()
257
+
258
+
259
+ if __name__ == '__main__':
260
+
261
+ # TODO: flag to overwrite runs that were running when downloaded and saved: check if file date is older than run end time.
262
+
263
+ args = parse_arguments()
264
+ print(args)
265
+
266
+ filters = None# {"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
267
+ # filters={'tags': {'$in': ['5F4tQyWrhfGVcNhoqeiNsR6KjD4wMZ2kfhLj4oHYuyHbZAc3']}} # Is foundation validator
268
+ if args.load_runs and os.path.exists('data/wandb.csv'):
269
+ df_runs = pd.read_csv('data/wandb.csv')
270
+ assert len(df_runs) >= args.ntop, f'Loaded {len(df_runs)} runs, but expected at least {args.ntop}'
271
+ df_runs = df_runs.iloc[:args.ntop]
272
+ else:
273
+ df_runs = pull_wandb_runs(ntop=args.ntop,
274
+ min_steps=args.min_steps,
275
+ netuid=args.netuid,
276
+ filters=filters
277
+ )#summary_filters=lambda s: s.get('augment_prompt'))
278
+ df_runs.to_csv('data/wandb.csv', index=False)
279
+
280
+
281
+ os.makedirs('data/runs/', exist_ok=True)
282
+ os.makedirs('data/aggs/', exist_ok=True)
283
+ os.makedirs('data/figures/', exist_ok=True)
284
+
285
+ display(df_runs)
286
+ if not args.no_plot:
287
+ plot_gantt(df_runs)
288
+
289
+ with ProcessPoolExecutor(max_workers=min(args.max_workers, df_runs.shape[0])) as executor:
290
+ futures = [executor.submit(
291
+ process,
292
+ run,
293
+ load=not args.no_load,
294
+ save=not args.no_save,
295
+ load_stats=not args.no_load_stats,
296
+ freq=args.freq,
297
+ ntop=args.completions_ntop
298
+ )
299
+ for _, run in df_runs.iterrows()
300
+ ]
301
+
302
+ # Use tqdm to add a progress bar
303
+ results = []
304
+ with tqdm.tqdm(total=len(futures)) as pbar:
305
+ for future in futures:
306
+ try:
307
+ result = future.result()
308
+ results.append(result)
309
+ except Exception as e:
310
+ print(f'-----------------------------\nWorker generated an exception in "process" function:\n{e.__class__.__name__}: {e}\n-----------------------------\n',flush=True)
311
+ pbar.update(1)
312
+
313
+ if not results:
314
+ raise ValueError('No runs were successfully processed.')
315
+ print(f'Processed {len(results)} runs.',flush=True)
316
+
317
+ # Concatenate the results into a single dataframe
318
+ df = pd.concat(results, ignore_index=True).sort_values(['_timestamp','run_id'], ignore_index=True)
319
+
320
+ df.to_csv('data/processed.csv', index=False)
321
+ print(f'Saved {df.shape[0]} rows to data/processed.csv')
322
+
323
+ display(df)
324
+ print(f'Unique values in columns:')
325
+ display(df.nunique().sort_values())
326
+ if not args.no_plot:
327
+
328
+ plots = []
329
+
330
+ cols = df.set_index(['run_id','_timestamp']).columns
331
+ with ProcessPoolExecutor(max_workers=min(args.max_workers, len(cols))) as executor:
332
+ futures = [executor.submit(line_chart, df, c) for c in cols]
333
+
334
+ # Use tqdm to add a progress bar
335
+ results = []
336
+ with tqdm.tqdm(total=len(futures)) as pbar:
337
+ for future in futures:
338
+ try:
339
+ result = future.result()
340
+ plots.append(result)
341
+ except Exception as e:
342
+ print(f'-----------------------------\nWorker generated an exception in "line_chart" function:\n{e.__class__.__name__}: {e}\n-----------------------------\n',flush=True)
343
+ # traceback.print_exc()
344
+ pbar.update(1)
345
+
346
+ print(f'Saved {len(plots)} plots to data/figures/')
347
+
348
+
opendashboards/assets/inspect.py CHANGED
@@ -3,6 +3,9 @@ import streamlit as st
3
  import pandas as pd
4
  import opendashboards.utils.utils as utils
5
 
 
 
 
6
  @st.cache_data
7
  def explode_data(df):
8
  list_cols = utils.get_list_col_lengths(df)
@@ -19,19 +22,9 @@ def explode_data(df):
19
  def completions(df_long, col):
20
  return df_long[col].value_counts()
21
 
22
- @st.cache_data
23
- def weights(df, index='_timestamp'):
24
- # Create a column for each UID and show most recent rows
25
- scores = df['moving_averaged_scores'].apply(pd.Series).fillna(method='ffill')
26
- if index in df.columns:
27
- scores.index = df[index]
28
-
29
- # rename columns
30
- scores.rename({i: f'UID-{i}' for i in range(scores.shape[1])}, axis=1, inplace=True)
31
- return scores
32
-
33
  def run_event_data(df_runs, df, selected_runs):
34
-
35
  st.markdown('#')
36
 
37
  show_col1, show_col2 = st.columns(2)
@@ -51,4 +44,7 @@ def run_event_data(df_runs, df, selected_runs):
51
  column_config={
52
  "url": st.column_config.LinkColumn("URL"),
53
  }
54
- )
 
 
 
 
3
  import pandas as pd
4
  import opendashboards.utils.utils as utils
5
 
6
+ def clean_data(df):
7
+ return df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any')
8
+
9
  @st.cache_data
10
  def explode_data(df):
11
  list_cols = utils.get_list_col_lengths(df)
 
22
  def completions(df_long, col):
23
  return df_long[col].value_counts()
24
 
25
+
 
 
 
 
 
 
 
 
 
 
26
  def run_event_data(df_runs, df, selected_runs):
27
+
28
  st.markdown('#')
29
 
30
  show_col1, show_col2 = st.columns(2)
 
44
  column_config={
45
  "url": st.column_config.LinkColumn("URL"),
46
  }
47
+ )
48
+
49
+ def highlight_row(row, expr, color='lightgrey', bg_color='white'):
50
+ return [f'background-color:{color}' if expr else f'background-color:{bg_color}'] * len(row)
opendashboards/assets/io.py CHANGED
@@ -5,19 +5,36 @@ import streamlit as st
5
 
6
  import opendashboards.utils.utils as utils
7
 
 
 
 
 
 
 
 
8
 
9
  @st.cache_data
10
  def load_runs(project, filters, min_steps=10):
11
  runs = []
 
 
 
12
  msg = st.empty()
13
- for run in utils.get_runs(project, filters, api_key=st.secrets['WANDB_API_KEY']):
14
- step = run.summary.get('_step',0)
 
 
 
 
15
  if step < min_steps:
16
  msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
17
  continue
18
-
19
- duration = run.summary.get('_runtime')
20
- end_time = run.summary.get('_timestamp')
 
 
 
21
  # extract values for selected tags
22
  rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
23
  tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
@@ -27,17 +44,22 @@ def load_runs(project, filters, min_steps=10):
27
  runs.append({
28
  'state': run.state,
29
  'num_steps': step,
 
30
  'entity': run.entity,
31
- 'id': run.id,
32
- 'name': run.name,
33
  'project': run.project,
34
  'url': run.url,
35
- 'path': os.path.join(run.entity, run.project, run.id),
36
  'start_time': pd.to_datetime(end_time-duration, unit="s"),
37
  'end_time': pd.to_datetime(end_time, unit="s"),
38
- 'duration': pd.to_datetime(duration, unit="s"),
39
  **tags
40
  })
 
 
 
 
41
  msg.empty()
42
  return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
43
 
@@ -56,7 +78,7 @@ def load_data(selected_runs, load=True, save=False):
56
  run = selected_runs.loc[idx]
57
  prog_msg = f'Loading data {i/len(selected_runs)*100:.0f}% ({successful}/{len(selected_runs)} runs, {n_events} events)'
58
 
59
- file_path = os.path.join('data',f'history-{run.id}.csv')
60
 
61
  if load and os.path.exists(file_path):
62
  progress.progress(i/len(selected_runs),f'{prog_msg}... **reading** `{file_path}`')
@@ -67,18 +89,19 @@ def load_data(selected_runs, load=True, save=False):
67
  st.exception(e)
68
  continue
69
  else:
70
- progress.progress(i/len(selected_runs),f'{prog_msg}... **downloading** `{run.path}`')
71
  try:
72
- # Download the history from wandb
73
- df = utils.download_data(run.path)
74
- # Add metadata to the dataframe
75
- df.assign(**run.to_dict())
76
 
 
 
 
77
  if save and run.state != 'running':
78
  df.to_csv(file_path, index=False)
79
  # st.info(f'Saved history to {file_path}')
80
  except Exception as e:
81
- info.warning(f'Failed to download history for `{run.path}`')
82
  st.exception(e)
83
  continue
84
 
@@ -94,3 +117,92 @@ def load_data(selected_runs, load=True, save=False):
94
  return pd.concat(frames)
95
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  import opendashboards.utils.utils as utils
7
 
8
+ from pandas.api.types import (
9
+ is_categorical_dtype,
10
+ is_datetime64_any_dtype,
11
+ is_numeric_dtype,
12
+ is_object_dtype,
13
+ )
14
+
15
 
16
  @st.cache_data
17
  def load_runs(project, filters, min_steps=10):
18
  runs = []
19
+ n_events = 0
20
+ successful = 0
21
+ progress = st.progress(0, 'Fetching runs from wandb')
22
  msg = st.empty()
23
+
24
+ all_runs = utils.get_runs(project, filters, api_key=st.secrets['WANDB_API_KEY'])
25
+ for i, run in enumerate(all_runs):
26
+
27
+ summary = run.summary
28
+ step = summary.get('_step',-1) + 1
29
  if step < min_steps:
30
  msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
31
  continue
32
+
33
+ prog_msg = f'Loading data {i/len(all_runs)*100:.0f}% ({successful}/{len(all_runs)} runs, {n_events} events)'
34
+ progress.progress(i/len(all_runs),f'{prog_msg}... **fetching** `{run.name}`')
35
+
36
+ duration = summary.get('_runtime')
37
+ end_time = summary.get('_timestamp')
38
  # extract values for selected tags
39
  rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
40
  tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
 
44
  runs.append({
45
  'state': run.state,
46
  'num_steps': step,
47
+ 'num_completions': step*sum(len(v) for k, v in run.summary.items() if k.endswith('completions') and isinstance(v, list)),
48
  'entity': run.entity,
49
+ 'run_id': run.id,
50
+ 'run_name': run.name,
51
  'project': run.project,
52
  'url': run.url,
53
+ 'run_path': os.path.join(run.entity, run.project, run.id),
54
  'start_time': pd.to_datetime(end_time-duration, unit="s"),
55
  'end_time': pd.to_datetime(end_time, unit="s"),
56
+ 'duration': pd.to_timedelta(duration, unit="s").round('s'),
57
  **tags
58
  })
59
+ n_events += step
60
+ successful += 1
61
+
62
+ progress.empty()
63
  msg.empty()
64
  return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
65
 
 
78
  run = selected_runs.loc[idx]
79
  prog_msg = f'Loading data {i/len(selected_runs)*100:.0f}% ({successful}/{len(selected_runs)} runs, {n_events} events)'
80
 
81
+ file_path = os.path.join('data',f'history-{run.run_id}.csv')
82
 
83
  if load and os.path.exists(file_path):
84
  progress.progress(i/len(selected_runs),f'{prog_msg}... **reading** `{file_path}`')
 
89
  st.exception(e)
90
  continue
91
  else:
92
+ progress.progress(i/len(selected_runs),f'{prog_msg}... **downloading** `{run.run_path}`')
93
  try:
94
+ # Download the history from wandb and add metadata
95
+ df = utils.download_data(run.run_path).assign(**run.to_dict())
 
 
96
 
97
+ print(f'Downloaded {df.shape[0]} events from `{run.run_path}`. Columns: {df.columns}')
98
+ df.info()
99
+
100
  if save and run.state != 'running':
101
  df.to_csv(file_path, index=False)
102
  # st.info(f'Saved history to {file_path}')
103
  except Exception as e:
104
+ info.warning(f'Failed to download history for `{run.run_path}`')
105
  st.exception(e)
106
  continue
107
 
 
117
  return pd.concat(frames)
118
 
119
 
120
+ def filter_dataframe(df: pd.DataFrame, demo_selection=None) -> pd.DataFrame:
121
+ """
122
+ Adds a UI on top of a dataframe to let viewers filter columns
123
+
124
+ Args:
125
+ df (pd.DataFrame): Original dataframe
126
+ demo_selection (pd.Index): Index of runs to select (if demo)
127
+
128
+ Returns:
129
+ pd.DataFrame: Filtered dataframe
130
+ """
131
+ filter_mode = st.sidebar.radio("Filter mode", ("Use demo", "Add filters"), index=0)
132
+
133
+ run_msg = st.info("Select a single wandb run or compare multiple runs")
134
+
135
+ if filter_mode == "Use demo":
136
+ df = df.loc[demo_selection]
137
+ run_msg.info(f"Selected {len(df)} runs")
138
+ return df
139
+
140
+ df = df.copy()
141
+
142
+ # Try to convert datetimes into a standarrd format (datetime, no timezone)
143
+ for col in df.columns:
144
+ if is_object_dtype(df[col]):
145
+ try:
146
+ df[col] = pd.to_datetime(df[col])
147
+ except Exception:
148
+ pass
149
+
150
+ if is_datetime64_any_dtype(df[col]):
151
+ df[col] = df[col].dt.tz_localize(None)
152
+
153
+ modification_container = st.container()
154
+
155
+ with modification_container:
156
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
157
+ for column in to_filter_columns:
158
+ left, right = st.columns((1, 20))
159
+ # Treat columns with < 10 unique values as categorical
160
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
161
+ user_cat_input = right.multiselect(
162
+ f"Values for {column}",
163
+ df[column].unique(),
164
+ default=list(df[column].unique()),
165
+ )
166
+ df = df[df[column].isin(user_cat_input)]
167
+ elif is_numeric_dtype(df[column]):
168
+ _min = float(df[column].min())
169
+ _max = float(df[column].max())
170
+ step = (_max - _min) / 100
171
+ user_num_input = right.slider(
172
+ f"Values for {column}",
173
+ min_value=_min,
174
+ max_value=_max,
175
+ value=(_min, _max),
176
+ step=step,
177
+ )
178
+ df = df[df[column].between(*user_num_input)]
179
+ elif is_datetime64_any_dtype(df[column]):
180
+ user_date_input = right.date_input(
181
+ f"Values for {column}",
182
+ value=(
183
+ df[column].min(),
184
+ df[column].max(),
185
+ ),
186
+ )
187
+ if len(user_date_input) == 2:
188
+ user_date_input = tuple(map(pd.to_datetime, user_date_input))
189
+ start_date, end_date = user_date_input
190
+ df = df.loc[df[column].between(start_date, end_date)]
191
+ else:
192
+ user_text_input = right.text_input(
193
+ f"Substring or regex in {column}",
194
+ )
195
+ if user_text_input:
196
+ df = df[df[column].astype(str).str.contains(user_text_input)]
197
+
198
+
199
+ # Load data if new runs selected
200
+ if len(df):
201
+ run_msg.info(f"Selected {len(df)} runs")
202
+ else:
203
+ # open a dialog to select runs
204
+ run_msg.error("Please select at least one run")
205
+ # st.snow()
206
+ # st.stop()
207
+
208
+ return df
opendashboards/assets/metric.py CHANGED
@@ -2,6 +2,18 @@ import time
2
  import pandas as pd
3
  import streamlit as st
4
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  @st.cache_data
7
  def wandb(df_runs):
@@ -9,50 +21,72 @@ def wandb(df_runs):
9
  # get rows where start time is older than 24h ago
10
  df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
11
 
12
- col1, col2, col3 = st.columns(3)
13
 
14
- col1.metric('Runs', df_runs.shape[0], delta=f'{df_runs.shape[0]-df_runs_old.shape[0]} (24h)')
15
- col2.metric('Hotkeys', df_runs.hotkey.nunique(), delta=f'{df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique()} (24h)')
16
- col3.metric('Events', df_runs.num_steps.sum(), delta=f'{df_runs.num_steps.sum()-df_runs_old.num_steps.sum()} (24h)')
 
 
 
17
  st.markdown('----')
18
 
19
 
20
  @st.cache_data
21
- def runs(df, df_long, selected_runs):
22
-
23
- col1, col2, col3 = st.columns(3)
24
- col1.metric(label="Runs", value=len(selected_runs))
25
- col1.metric(label="Events", value=df.shape[0]) #
26
- col2.metric(label="Followup UIDs", value=df_long.followup_uids.nunique())
27
- col2.metric(label="Answer UIDs", value=df_long.answer_uids.nunique())
28
- col3.metric(label="Followup Completions", value=df_long.followup_completions.nunique())
29
- col3.metric(label="Answer Completions", value=df_long.answer_completions.nunique())
 
 
 
 
 
 
 
 
 
 
 
30
  st.markdown('----')
31
 
32
 
33
-
34
  @st.cache_data
35
- def uids(df_long, src, uid=None):
36
 
37
- uid_col = f'{src}_uids'
38
- completion_col = f'{src}_completions'
39
  nsfw_col = f'{src}_nsfw_scores'
40
- reward_col = f'{src}_rewards'
41
 
42
- if uid is not None:
43
- df_long = df_long.loc[df_long[uid_col] == uid]
44
 
45
- col1, col2, col3 = st.columns(3)
46
  col1.metric(
47
  label="Success %",
48
- value=f'{df_long.loc[df_long[completion_col].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}'
 
49
  )
50
  col2.metric(
51
  label="Diversity %",
52
- value=f'{df_long[completion_col].nunique()/df_long.shape[0] * 100:.1f}'
 
53
  )
 
 
 
54
  col3.metric(
 
 
 
 
 
55
  label="Toxicity %",
56
- value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else 'N/A'
 
57
  )
58
  st.markdown('----')
 
2
  import pandas as pd
3
  import streamlit as st
4
 
5
+ def fmt(number):
6
+ units = ['', 'k', 'M', 'B']
7
+ magnitude = 0
8
+ while abs(number) >= 1000 and magnitude < len(units) - 1:
9
+ magnitude += 1
10
+ number /= 1000
11
+
12
+ if units[magnitude]:
13
+ return f'{number:.2f}{units[magnitude]}'
14
+ else:
15
+ return f'{number:.0f}{units[magnitude]}'
16
+
17
 
18
  @st.cache_data
19
  def wandb(df_runs):
 
21
  # get rows where start time is older than 24h ago
22
  df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
23
 
24
+ col1, col2, col3, col4 = st.columns(4)
25
 
26
+ # Convert to appropriate units e.g. 1.2k instead of 1200.
27
+ col1.metric('Runs', fmt(df_runs.shape[0]), delta=fmt(df_runs.shape[0]-df_runs_old.shape[0])+' (24h)')
28
+ col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)')
29
+ col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)')
30
+ col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)')
31
+
32
  st.markdown('----')
33
 
34
 
35
  @st.cache_data
36
+ def runs(df_long):
37
+
38
+ col1, col2, col3, col4 = st.columns(4)
39
+ print(df_long.columns)
40
+
41
+ # Convert to appropriate units e.g. 1.2k instead of 1200.c
42
+ col1.metric('Runs', fmt(df_long.run_id.nunique()))
43
+ col2.metric('Hotkeys', fmt(df_long.hotkey.nunique()))
44
+ col3.metric('Events', fmt(df_long.groupby(['run_id','_step']).ngroups))
45
+ col4.metric('Completions', fmt(df_long.shape[0]))
46
+
47
+ name_type = df_long.name.apply(lambda x: x if not x[-1].isdigit() else x[:-1])
48
+ aggs = df_long.groupby(name_type).agg({'uids': 'nunique', 'completions': 'nunique'})
49
+ print(aggs)
50
+ for i,c in enumerate(st.columns(len(aggs))):
51
+ name = aggs.index[i].title()
52
+ uid_unique, comp_unique = aggs.iloc[i]
53
+ c.metric(label=f'{name} UIDs', value=uid_unique)
54
+ c.metric(label=f'{name} Completions', value=comp_unique)
55
+
56
  st.markdown('----')
57
 
58
 
59
+
60
  @st.cache_data
61
+ def uids(df_long, src, uids=None):
62
 
 
 
63
  nsfw_col = f'{src}_nsfw_scores'
 
64
 
65
+ if uids:
66
+ df_long = df_long.loc[df_long['uids'].isin(uids)]
67
 
68
+ col1, col2, col3, col4 = st.columns(4)
69
  col1.metric(
70
  label="Success %",
71
+ value=f'{df_long.loc[df_long["completions"].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}',
72
+ help='Number of successful completions divided by total number of events'
73
  )
74
  col2.metric(
75
  label="Diversity %",
76
+ value=f'{df_long["completions"].nunique()/df_long.shape[0] * 100:.1f}',
77
+ help='Number of unique completions divided by total number of events'
78
  )
79
+ # uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions
80
+ # uniqueness is the shared completions between selected uids
81
+
82
  col3.metric(
83
+ label="Uniqueness %",
84
+ value=f'{df_long.groupby("uids")["completions"].nunique().mean()/df_long["completions"].nunique() * 100:.1f}',
85
+ help='Average number of unique completions per uid divided by all unique completions'
86
+ )
87
+ col4.metric(
88
  label="Toxicity %",
89
+ value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else '--',
90
+ help='Average toxicity score of all events'
91
  )
92
  st.markdown('----')
opendashboards/assets/plot.py CHANGED
@@ -8,6 +8,8 @@ def uid_diversty(df, rm_failed=True):
8
  plotting.plot_uid_diversty(
9
  df,
10
  remove_unsuccessful=rm_failed
 
 
11
  ),
12
  use_container_width=True
13
  )
@@ -22,6 +24,8 @@ def leaderboard(df, ntop, group_on, agg_col, agg, alias=False):
22
  agg_col=agg_col,
23
  agg=agg,
24
  alias=alias
 
 
25
  ),
26
  use_container_width=True
27
  )
@@ -49,4 +53,30 @@ def weights(df, uids, ntop=10):
49
  ntop=ntop
50
  ),
51
  use_container_width=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  )
 
8
  plotting.plot_uid_diversty(
9
  df,
10
  remove_unsuccessful=rm_failed
11
+ ).update_layout(
12
+ coloraxis_showscale=False,
13
  ),
14
  use_container_width=True
15
  )
 
24
  agg_col=agg_col,
25
  agg=agg,
26
  alias=alias
27
+ ).update_layout(
28
+ coloraxis_showscale=False,
29
  ),
30
  use_container_width=True
31
  )
 
53
  ntop=ntop
54
  ),
55
  use_container_width=True
56
+ )
57
+
58
+ def completion_length_time(df, completion_col, uid_col, time_col, length_opt='characters'):
59
+ return st.plotly_chart(
60
+ plotting.plot_completion_length_time(
61
+ df,
62
+ uid_col=uid_col,
63
+ completion_col=completion_col,
64
+ time_col=time_col,
65
+ length_opt=length_opt
66
+ ),
67
+ use_container_width=True
68
+ )
69
+
70
+ def uid_completion_counts(df, uids, src, rm_empty, ntop=100, cumulative=False, normalize=True):
71
+ return st.plotly_chart(
72
+ plotting.plot_uid_completion_counts(
73
+ df,
74
+ uids=uids,
75
+ src=src,
76
+ rm_empty=rm_empty,
77
+ ntop=ntop,
78
+ cumulative=cumulative,
79
+ normalize=normalize
80
+ ),
81
+ use_container_width=True
82
  )
opendashboards/utils/aggregate.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def diversity(x):
4
+ return x.nunique()/len(x) if len(x)>0 else 0
5
+
6
+ def _nonempty(x):
7
+ return x[x.astype(str).str.len()>0]
8
+
9
+ def successful_diversity(x):
10
+ return diversity(_nonempty(x))
11
+
12
+ def success_rate(x):
13
+ return len(_nonempty(x))/len(x) if len(x)>0 else 0
14
+
15
+ def threshold_rate(x, threshold):
16
+ return (x>threshold).sum()/len(x)
17
+
18
+ def successful_nonzero_diversity(x):
19
+ # To be used with groupby.apply
20
+ return pd.Series({'completions_successful_nonzero_diversity': successful_diversity(x.loc[x['rewards']>0,'completions'])})
21
+
22
+ def completion_top_stats(x, exclude=None, ntop=1):
23
+ # To be used with groupby.apply
24
+ vc = x['completions'].value_counts()
25
+ if exclude is not None:
26
+ vc.drop(exclude, inplace=True, errors='ignore')
27
+
28
+ rewards = x.loc[x['completions'].isin(vc.index[:ntop])].groupby('completions').rewards.agg(['mean','std','max'])
29
+ return pd.DataFrame({
30
+ 'completions_top':rewards.index.tolist(),
31
+ 'completions_freq':vc.values[:ntop],
32
+ 'completions_reward_mean':rewards['mean'].values,
33
+ 'completions_reward_std':rewards['std'].values
34
+ })
35
+
36
+ def top(x, i=0, exclude=''):
37
+ return _nonempty(x).value_counts().drop(exclude, errors='ignore').index[i]
38
+
39
+ def freq(x, i=0, exclude=''):
40
+ return _nonempty(x).value_counts().drop(exclude, errors='ignore').values[i]
41
+
42
+ def nonzero_rate(x):
43
+ return (x>0).sum()/len(x)
44
+
45
+ def nonzero_mean(x):
46
+ return x[x>0].mean()
47
+
48
+ def nonzero_std(x):
49
+ return x[x>0].std()
50
+
51
+ def nonzero_median(x):
52
+ return x[x>0].median()
opendashboards/utils/plotting.py CHANGED
@@ -44,7 +44,7 @@ def plot_throughput(df: pd.DataFrame, n_minutes: int = 10) -> go.Figure:
44
 
45
 
46
  def plot_weights(scores: pd.DataFrame, ntop: int = 20, uids: List[Union[str, int]] = None) -> go.Figure:
47
- """_summary_
48
 
49
  Args:
50
  scores (pd.DataFrame): Dataframe of scores. Should be indexed by timestamp and have one column per uid.
@@ -62,16 +62,16 @@ def plot_weights(scores: pd.DataFrame, ntop: int = 20, uids: List[Union[str, int
62
  ).update_traces(opacity=0.7)
63
 
64
 
65
- def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go.Figure:
66
  """Plot uid diversity as measured by ratio of unique to total completions.
67
 
68
  Args:
69
  df (pd.DataFrame): Dataframe of event log.
70
  """
71
- uid_cols = ["followup_uids", "answer_uids"]
72
- completion_cols = ["followup_completions", "answer_completions"]
73
- reward_cols = ["followup_rewards", "answer_rewards"]
74
- list_cols = uid_cols + completion_cols + reward_cols
75
 
76
  df = df[list_cols].explode(column=list_cols)
77
  if remove_unsuccessful:
@@ -88,7 +88,7 @@ def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go
88
  frames.append(frame)
89
 
90
  merged = pd.merge(*frames, left_index=True, right_index=True, suffixes=("_followup", "_answer"))
91
- merged["reward_mean"] = merged.filter(regex="rewards_mean").mean(axis=1)
92
 
93
  merged.index.name = "UID"
94
  merged.reset_index(inplace=True)
@@ -97,8 +97,8 @@ def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go
97
  merged,
98
  x="diversity_followup",
99
  y="diversity_answer",
100
- opacity=0.3,
101
- size="followup_completions_size",
102
  color="reward_mean",
103
  hover_data=["UID"] + merged.columns.tolist(),
104
  marginal_x="histogram",
@@ -112,7 +112,7 @@ def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go
112
 
113
  def plot_completion_rates(
114
  df: pd.DataFrame,
115
- msg_col: str = "all_completions",
116
  time_interval: str = "H",
117
  time_col: str = "_timestamp",
118
  ntop: int = 20,
@@ -123,7 +123,7 @@ def plot_completion_rates(
123
 
124
  Args:
125
  df (pd.DataFrame): Dataframe of event log.
126
- msg_col (str, optional): List-like column containing completions. Defaults to 'all_completions'.
127
  time_interval (str, optional): Pandas time interval. Defaults to 'H'. See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
128
  time_col (str, optional): Column containing timestamps as pd.Datetime. Defaults to '_timestamp'.
129
  ntop (int, optional): Number of completions to plot. Defaults to 20.
@@ -163,10 +163,10 @@ def plot_completion_rates(
163
 
164
  def plot_completion_rewards(
165
  df: pd.DataFrame,
166
- msg_col: str = "followup_completions",
167
- reward_col: str = "followup_rewards",
168
  time_col: str = "_timestamp",
169
- uid_col: str = "followup_uids",
170
  ntop: int = 3,
171
  completions: List[str] = None,
172
  completion_regex: str = None,
@@ -175,9 +175,10 @@ def plot_completion_rewards(
175
 
176
  Args:
177
  df (pd.DataFrame): Dataframe of event log.
178
- msg_col (str, optional): List-like column containing completions. Defaults to 'followup_completions'.
179
- reward_col (str, optional): List-like column containing rewards. Defaults to 'followup_rewards'.
180
  time_col (str, optional): Column containing timestamps as pd.Datetime. Defaults to '_timestamp'.
 
181
  ntop (int, optional): Number of completions to plot. Defaults to 20.
182
  completions (List[str], optional): List of completions to plot. Defaults to None.
183
  completion_regex (str, optional): Regex to match completions. Defaults to None.
@@ -198,7 +199,11 @@ def plot_completion_rewards(
198
  else:
199
  completions = completion_counts.index[:ntop]
200
  print(f"Using top {len(completions)} completions: \n{completions}")
201
-
 
 
 
 
202
  # Get ranks of completions in terms of number of occurrences
203
  ranks = completion_counts.rank(method="dense", ascending=False).loc[completions].astype(int)
204
 
@@ -219,14 +224,14 @@ def plot_completion_rewards(
219
  labels={"rank": "Rank", reward_col: "Reward", time_col: ""},
220
  title=f"Rewards for {len(completions)} Messages",
221
  **plotly_config,
222
- opacity=0.3,
223
  )
224
 
225
 
226
  def plot_leaderboard(
227
  df: pd.DataFrame,
228
- group_on: str = "answer_uids",
229
- agg_col: str = "answer_rewards",
230
  agg: str = "mean",
231
  ntop: int = 10,
232
  alias: bool = False,
@@ -235,44 +240,44 @@ def plot_leaderboard(
235
 
236
  Args:
237
  df (pd.DataFrame): Dataframe of event log.
238
- group_on (str, optional): Entities to use for grouping. Defaults to 'answer_uids'.
239
- agg_col (str, optional): Column to aggregate. Defaults to 'answer_rewards'.
240
  agg (str, optional): Aggregation function. Defaults to 'mean'.
241
  ntop (int, optional): Number of entities to plot. Defaults to 10.
242
  alias (bool, optional): Whether to use aliases for indices. Defaults to False.
243
  """
244
  df = df[[group_on, agg_col]].explode(column=[group_on, agg_col])
245
 
246
- rankings = df.groupby(group_on)[agg_col].agg(agg).sort_values(ascending=False).head(ntop)
247
  if alias:
248
  index = rankings.index.map({name: str(i) for i, name in enumerate(rankings.index)})
249
  else:
250
  index = rankings.index.astype(str)
251
 
252
- print(f"Using top {ntop} {group_on} by {agg_col}: \n{rankings}")
253
  return px.bar(
254
- x=rankings.astype(float),
255
  y=index,
256
  color=rankings,
257
  orientation="h",
258
  labels={"x": f"{agg_col.title()}", "y": group_on, "color": ""},
259
  title=f"Leaderboard for {agg_col}, top {ntop} {group_on}",
260
  color_continuous_scale="BlueRed",
261
- opacity=0.5,
262
  hover_data=[rankings.index.astype(str)],
263
  **plotly_config,
264
  )
265
 
266
 
 
267
  def plot_dendrite_rates(
268
- df: pd.DataFrame, uid_col: str = "answer_uids", reward_col: str = "answer_rewards", ntop: int = 20, uids: List[int] = None
269
  ) -> go.Figure:
270
  """Makes a bar chart of the success rate of dendrite calls for a given set of uids.
271
 
272
  Args:
273
  df (pd.DataFrame): Dataframe of event log.
274
- uid_col (str, optional): Column containing uids. Defaults to 'answer_uids'.
275
- reward_col (str, optional): Column containing rewards. Defaults to 'answer_rewards'.
276
  ntop (int, optional): Number of uids to plot. Defaults to 20.
277
  uids (List[int], optional): List of uids to plot. Defaults to None.
278
 
@@ -297,15 +302,91 @@ def plot_dendrite_rates(
297
  barmode="group",
298
  title="Dendrite Calls by UID",
299
  color_continuous_scale="Blues",
300
- opacity=0.5,
301
  **plotly_config,
302
  )
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
  def plot_network_embedding(
306
  df: pd.DataFrame,
307
- uid_col: str = "followup_uids",
308
- completion_col: str = "followup_completions",
309
  ntop: int = 1,
310
  uids: List[int] = None,
311
  ) -> go.Figure:
@@ -314,8 +395,8 @@ def plot_network_embedding(
314
  Args:
315
  df (pd.DataFrame): Dataframe of event log.
316
 
317
- uid_col (str, optional): Column containing uids. Defaults to 'answer_uids'.
318
- completion_col (str, optional): Column containing completions. Defaults to 'followup_completions'.
319
  ntop (int, optional): Number of uids to plot. Defaults to 20.
320
  hover_data (List[str], optional): Columns to include in hover data. Defaults to None.
321
  uids (List[int], optional): List of uids to plot. Defaults to None.
@@ -358,6 +439,6 @@ def plot_network_embedding(
358
  title=f"Graph for Top {ntop} Completion Similarities",
359
  color_continuous_scale="BlueRed",
360
  hover_data=["UID", "top_completions"],
361
- opacity=0.5,
362
  **plotly_config,
363
  )
 
44
 
45
 
46
  def plot_weights(scores: pd.DataFrame, ntop: int = 20, uids: List[Union[str, int]] = None) -> go.Figure:
47
+ """Plot weights of uids.
48
 
49
  Args:
50
  scores (pd.DataFrame): Dataframe of scores. Should be indexed by timestamp and have one column per uid.
 
62
  ).update_traces(opacity=0.7)
63
 
64
 
65
+ def plot_uid_diversty(df: pd.DataFrame, x: str = 'followup', y: str = 'answer', remove_unsuccessful: bool = False) -> go.Figure:
66
  """Plot uid diversity as measured by ratio of unique to total completions.
67
 
68
  Args:
69
  df (pd.DataFrame): Dataframe of event log.
70
  """
71
+ return px.scatter(x=[1,2,3],y=[1,2,3])
72
+ xrows = df.loc[df.name.str.contains(x)]
73
+ yrows = df.loc[df.name.str.contains(y)]
74
+ df = pd.merge(xrows, yrows, on='uid', suffixes=('_followup', '_answer'))
75
 
76
  df = df[list_cols].explode(column=list_cols)
77
  if remove_unsuccessful:
 
88
  frames.append(frame)
89
 
90
  merged = pd.merge(*frames, left_index=True, right_index=True, suffixes=("_followup", "_answer"))
91
+ merged["reward_mean"] = merged.filter(regex="rewards_mean").mean(axis=1).astype(float)
92
 
93
  merged.index.name = "UID"
94
  merged.reset_index(inplace=True)
 
97
  merged,
98
  x="diversity_followup",
99
  y="diversity_answer",
100
+ opacity=0.35,
101
+ # size="completions_size",
102
  color="reward_mean",
103
  hover_data=["UID"] + merged.columns.tolist(),
104
  marginal_x="histogram",
 
112
 
113
  def plot_completion_rates(
114
  df: pd.DataFrame,
115
+ msg_col: str = "completions",
116
  time_interval: str = "H",
117
  time_col: str = "_timestamp",
118
  ntop: int = 20,
 
123
 
124
  Args:
125
  df (pd.DataFrame): Dataframe of event log.
126
+ msg_col (str, optional): List-like column containing completions. Defaults to 'completions'.
127
  time_interval (str, optional): Pandas time interval. Defaults to 'H'. See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
128
  time_col (str, optional): Column containing timestamps as pd.Datetime. Defaults to '_timestamp'.
129
  ntop (int, optional): Number of completions to plot. Defaults to 20.
 
163
 
164
  def plot_completion_rewards(
165
  df: pd.DataFrame,
166
+ msg_col: str = "completions",
167
+ reward_col: str = "rewards",
168
  time_col: str = "_timestamp",
169
+ uid_col: str = "uids",
170
  ntop: int = 3,
171
  completions: List[str] = None,
172
  completion_regex: str = None,
 
175
 
176
  Args:
177
  df (pd.DataFrame): Dataframe of event log.
178
+ msg_col (str, optional): List-like column containing completions. Defaults to 'completions'.
179
+ reward_col (str, optional): List-like column containing rewards. Defaults to 'rewards'.
180
  time_col (str, optional): Column containing timestamps as pd.Datetime. Defaults to '_timestamp'.
181
+ uid_col (str, optional): Column containing UIDs. Defaults to 'uids'.
182
  ntop (int, optional): Number of completions to plot. Defaults to 20.
183
  completions (List[str], optional): List of completions to plot. Defaults to None.
184
  completion_regex (str, optional): Regex to match completions. Defaults to None.
 
199
  else:
200
  completions = completion_counts.index[:ntop]
201
  print(f"Using top {len(completions)} completions: \n{completions}")
202
+ else:
203
+ found_completions = [c for c in completions if c in completion_counts.index]
204
+ print(f"Using {len(found_completions)}/{len(completions)} completions: \n{found_completions}")
205
+ completions = found_completions
206
+
207
  # Get ranks of completions in terms of number of occurrences
208
  ranks = completion_counts.rank(method="dense", ascending=False).loc[completions].astype(int)
209
 
 
224
  labels={"rank": "Rank", reward_col: "Reward", time_col: ""},
225
  title=f"Rewards for {len(completions)} Messages",
226
  **plotly_config,
227
+ opacity=0.35,
228
  )
229
 
230
 
231
  def plot_leaderboard(
232
  df: pd.DataFrame,
233
+ group_on: str = "uids",
234
+ agg_col: str = "rewards",
235
  agg: str = "mean",
236
  ntop: int = 10,
237
  alias: bool = False,
 
240
 
241
  Args:
242
  df (pd.DataFrame): Dataframe of event log.
243
+ group_on (str, optional): Entities to use for grouping. Defaults to 'uids'.
244
+ agg_col (str, optional): Column to aggregate. Defaults to 'rewards'.
245
  agg (str, optional): Aggregation function. Defaults to 'mean'.
246
  ntop (int, optional): Number of entities to plot. Defaults to 10.
247
  alias (bool, optional): Whether to use aliases for indices. Defaults to False.
248
  """
249
  df = df[[group_on, agg_col]].explode(column=[group_on, agg_col])
250
 
251
+ rankings = df.groupby(group_on)[agg_col].agg(agg).sort_values(ascending=False).head(ntop).astype(float)
252
  if alias:
253
  index = rankings.index.map({name: str(i) for i, name in enumerate(rankings.index)})
254
  else:
255
  index = rankings.index.astype(str)
256
 
 
257
  return px.bar(
258
+ x=rankings,
259
  y=index,
260
  color=rankings,
261
  orientation="h",
262
  labels={"x": f"{agg_col.title()}", "y": group_on, "color": ""},
263
  title=f"Leaderboard for {agg_col}, top {ntop} {group_on}",
264
  color_continuous_scale="BlueRed",
265
+ opacity=0.35,
266
  hover_data=[rankings.index.astype(str)],
267
  **plotly_config,
268
  )
269
 
270
 
271
+
272
  def plot_dendrite_rates(
273
+ df: pd.DataFrame, uid_col: str = "uids", reward_col: str = "rewards", ntop: int = 20, uids: List[int] = None
274
  ) -> go.Figure:
275
  """Makes a bar chart of the success rate of dendrite calls for a given set of uids.
276
 
277
  Args:
278
  df (pd.DataFrame): Dataframe of event log.
279
+ uid_col (str, optional): Column containing uids. Defaults to 'uids'.
280
+ reward_col (str, optional): Column containing rewards. Defaults to 'rewards'.
281
  ntop (int, optional): Number of uids to plot. Defaults to 20.
282
  uids (List[int], optional): List of uids to plot. Defaults to None.
283
 
 
302
  barmode="group",
303
  title="Dendrite Calls by UID",
304
  color_continuous_scale="Blues",
305
+ opacity=0.35,
306
  **plotly_config,
307
  )
308
 
309
+ def plot_completion_length_time(
310
+ df: pd.DataFrame,
311
+ uid_col: str = "uids",
312
+ completion_col: str = "completions",
313
+ time_col: str = "completion_times",
314
+ uids: List[int] = None,
315
+ length_opt: str = 'characters',
316
+ ) -> go.Figure:
317
+
318
+
319
+ df = df[[uid_col, completion_col, time_col]].explode(column=[uid_col, completion_col, time_col])
320
+ df["time"] = df[time_col].astype(float)
321
+ if uids is not None:
322
+ df = df.loc[df[uid_col].isin(uids)]
323
+
324
+
325
+ if length_opt == 'characters':
326
+ df["completion_length"] = df[completion_col].str.len()
327
+ elif length_opt == 'words':
328
+ df["completion_length"] = df[completion_col].str.split().str.len()
329
+ elif length_opt == 'sentences':
330
+ df["completion_length"] = df[completion_col].str.split('.').str.len()
331
+ else:
332
+ raise ValueError(f"length_opt must be one of 'words', 'characters', or 'sentences', got {length_opt}")
333
+
334
+ return px.scatter(
335
+ df,
336
+ x='completion_length',
337
+ y='time',
338
+ color=uid_col if uids is not None else None,
339
+ labels={"completion_length": f"Completion Length, {length_opt.title()}", "time": "Time (s)"},
340
+ title=f"Completion Length vs Time, {length_opt.title()}",
341
+ marginal_x="histogram",
342
+ marginal_y="histogram",
343
+ hover_data=[uid_col, completion_col],
344
+ opacity=0.35,
345
+ **plotly_config,
346
+ )
347
+
348
+ def plot_uid_completion_counts(
349
+ df: pd.DataFrame,
350
+ uids: List[int],
351
+ src: str = 'answer',
352
+ rm_empty: bool = True,
353
+ ntop: int = 100,
354
+ cumulative: bool = False,
355
+ normalize: bool = True,
356
+ ) -> go.Figure:
357
+
358
+ completion_col = f'completions'
359
+ uid_col = f'uids'
360
+ if rm_empty:
361
+ df = df.loc[df[completion_col].str.len()>0]
362
+
363
+ df = df.loc[df[uid_col].isin(uids)]
364
+
365
+ g = df.groupby(uid_col)[completion_col].value_counts(normalize=normalize).reset_index(level=1)
366
+ y_col = g.columns[-1]
367
+
368
+ # rescale each group to have a max of 1 if normalize is True
369
+ if cumulative:
370
+ g[y_col] = g.groupby(level=0)[y_col].cumsum().transform(lambda x: x/x.max() if normalize else x)
371
+
372
+ # get top n completions
373
+ g = g.groupby(level=0).head(ntop)
374
+
375
+ # # create a rank column which increments by one and resets when the uid changes
376
+ g['rank'] = g.groupby(level=0).cumcount()+1
377
+
378
+ return px.line(g.sort_index().reset_index(),
379
+ x='rank',y=y_col,color=uid_col,
380
+ labels={'rank':'Top Completions',uid_col:'UID',y_col:y_col.replace('_',' ').title()},
381
+ title=f'{src.title()} Completion {y_col.replace("_"," ").title()}s by Rank',
382
+ **plotly_config,
383
+ ).update_traces(opacity=0.7)
384
+
385
 
386
  def plot_network_embedding(
387
  df: pd.DataFrame,
388
+ uid_col: str = "uids",
389
+ completion_col: str = "completions",
390
  ntop: int = 1,
391
  uids: List[int] = None,
392
  ) -> go.Figure:
 
395
  Args:
396
  df (pd.DataFrame): Dataframe of event log.
397
 
398
+ uid_col (str, optional): Column containing uids. Defaults to 'uids'.
399
+ completion_col (str, optional): Column containing completions. Defaults to 'completions'.
400
  ntop (int, optional): Number of uids to plot. Defaults to 20.
401
  hover_data (List[str], optional): Columns to include in hover data. Defaults to None.
402
  uids (List[int], optional): List of uids to plot. Defaults to None.
 
439
  title=f"Graph for Top {ntop} Completion Similarities",
440
  color_continuous_scale="BlueRed",
441
  hover_data=["UID", "top_completions"],
442
+ opacity=0.35,
443
  **plotly_config,
444
  )