steffenc commited on
Commit
e60235b
·
1 Parent(s): 571cd62

Refactor package structure, some redesign and improved layout

Browse files
opendashboards/__init__.py CHANGED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+
3
+ __version__ = "0.1.0"
opendashboards/assets/inspect.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import opendashboards.utils.utils as utils
5
+
6
+
7
+ @st.cache_data
8
+ def explode_data(df):
9
+ list_cols = utils.get_list_col_lengths(df)
10
+ try:
11
+ return utils.explode_data(df, list(list_cols.keys())).apply(pd.to_numeric, errors='ignore')
12
+ except Exception as e:
13
+ st.error(f'Error exploding data with columns')
14
+ st.write(list_cols)
15
+ st.exception(e)
16
+ st.dataframe(df)
17
+ st.stop()
18
+
19
+ @st.cache_data
20
+ def completions(df_long, col):
21
+ return df_long[col].value_counts()
22
+
23
+ @st.cache_data
24
+ def weights(df, index='_timestamp'):
25
+ # Create a column for each UID and show most recent rows
26
+ scores = df['moving_averaged_scores'].apply(pd.Series).fillna(method='ffill')
27
+ if index in df.columns:
28
+ scores.index = df[index]
29
+
30
+ # rename columns
31
+ scores.rename({i: f'UID-{i}' for i in range(scores.shape[1])}, axis=1, inplace=True)
32
+ return scores
33
+
34
+ def run_event_data(df_runs, df, selected_runs):
35
+
36
+ st.markdown('#')
37
+
38
+ show_col1, show_col2 = st.columns(2)
39
+ show_runs = show_col1.checkbox('Show runs', value=True)
40
+ show_events = show_col2.checkbox('Show events', value=False)
41
+ if show_runs:
42
+ st.markdown(f'Wandb info for **{len(selected_runs)} selected runs**:')
43
+ st.dataframe(df_runs.loc[df_runs.id.isin(selected_runs)],
44
+ column_config={
45
+ "url": st.column_config.LinkColumn("URL"),
46
+ }
47
+ )
48
+
49
+ if show_events:
50
+ st.markdown(f'Raw events for **{len(selected_runs)} selected runs**:')
51
+ st.dataframe(df.head(50),
52
+ column_config={
53
+ "url": st.column_config.LinkColumn("URL"),
54
+ }
55
+ )
opendashboards/assets/io.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import pandas as pd
4
+ import streamlit as st
5
+
6
+ import opendashboards.utils.utils as utils
7
+
8
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
9
+
10
+ @st.cache_data
11
+ def load_runs(project, filters, min_steps=10):
12
+ runs = []
13
+ msg = st.empty()
14
+ for run in utils.get_runs(project, filters):
15
+ step = run.summary.get('_step',0)
16
+ if step < min_steps:
17
+ msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
18
+ continue
19
+
20
+ duration = run.summary.get('_runtime')
21
+ end_time = run.summary.get('_timestamp')
22
+ # extract values for selected tags
23
+ rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
24
+ tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
25
+ # include bool flag for remaining tags
26
+ tags.update({k: k in run.tags for k in ('mock','custom_gating_model','nsfw_filter','outsource_scoring','disable_set_weights')})
27
+
28
+ runs.append({
29
+ 'state': run.state,
30
+ 'num_steps': step,
31
+ 'entity': run.entity,
32
+ 'id': run.id,
33
+ 'name': run.name,
34
+ 'project': run.project,
35
+ 'url': run.url,
36
+ 'path': os.path.join(run.entity, run.project, run.id),
37
+ 'start_time': pd.to_datetime(end_time-duration, unit="s"),
38
+ 'end_time': pd.to_datetime(end_time, unit="s"),
39
+ 'duration': pd.to_datetime(duration, unit="s"),
40
+ **tags
41
+ })
42
+ msg.empty()
43
+ return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
44
+
45
+
46
+ @st.cache_data
47
+ def load_data(selected_runs, load=True, save=False):
48
+
49
+ frames = []
50
+ n_events = 0
51
+ progress = st.progress(0, 'Loading data')
52
+ info = st.empty()
53
+ for i, idx in enumerate(selected_runs.index):
54
+ run = selected_runs.loc[idx]
55
+ prog_msg = f'Loading data {i/len(selected_runs)*100:.0f}% ({i}/{len(selected_runs)} runs, {n_events} events)'
56
+
57
+ rel_path = os.path.join('data',f'history-{run.id}.csv')
58
+ file_path = os.path.join(BASE_DIR,rel_path)
59
+
60
+ if load and os.path.exists(file_path):
61
+ progress.progress(i/len(selected_runs),f'{prog_msg}... **reading** `{rel_path}`')
62
+ try:
63
+ df = utils.load_data(file_path)
64
+ except Exception as e:
65
+ info.warning(f'Failed to load history from `{file_path}`')
66
+ st.exception(e)
67
+ continue
68
+ else:
69
+ progress.progress(i/len(selected_runs),f'{prog_msg}... **downloading** `{run.path}`')
70
+ try:
71
+ # Download the history from wandb
72
+ df = utils.download_data(run.path)
73
+ df.assign(**run.to_dict())
74
+ if not os.path.exists('data/'):
75
+ os.makedirs(file_path)
76
+
77
+ if save and run.state != 'running':
78
+ df.to_csv(file_path, index=False)
79
+ # st.info(f'Saved history to {file_path}')
80
+ except Exception as e:
81
+ info.warning(f'Failed to download history for `{run.path}`')
82
+ st.exception(e)
83
+ continue
84
+
85
+ frames.append(df)
86
+ n_events += df.shape[0]
87
+
88
+ progress.empty()
89
+ if not frames:
90
+ info.error('No data loaded')
91
+ st.stop()
92
+ # Remove rows which contain chain weights as it messes up schema
93
+ return pd.concat(frames)
94
+
95
+
opendashboards/assets/metric.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import time
4
+ import pandas as pd
5
+ import streamlit as st
6
+
7
+
8
+ @st.cache_data
9
+ def wandb(df_runs):
10
+
11
+ # get rows where start time is older than 24h ago
12
+ df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
13
+
14
+ col1, col2, col3 = st.columns(3)
15
+
16
+ col1.metric('Runs', df_runs.shape[0], delta=f'{df_runs.shape[0]-df_runs_old.shape[0]} (24h)')
17
+ col2.metric('Hotkeys', df_runs.hotkey.nunique(), delta=f'{df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique()} (24h)')
18
+ col3.metric('Events', df_runs.num_steps.sum(), delta=f'{df_runs.num_steps.sum()-df_runs_old.num_steps.sum()} (24h)')
19
+ st.markdown('----')
20
+
21
+
22
+ @st.cache_data
23
+ def runs(df, df_long, selected_runs):
24
+
25
+ col1, col2, col3 = st.columns(3)
26
+ col1.metric(label="Runs", value=len(selected_runs))
27
+ col1.metric(label="Events", value=df.shape[0]) #
28
+ col2.metric(label="Followup UIDs", value=df_long.followup_uids.nunique())
29
+ col2.metric(label="Answer UIDs", value=df_long.answer_uids.nunique())
30
+ col3.metric(label="Followup Completions", value=df_long.followup_completions.nunique())
31
+ col3.metric(label="Answer Completions", value=df_long.answer_completions.nunique())
32
+ st.markdown('----')
33
+
34
+
35
+
36
+ @st.cache_data
37
+ def uids(df_long, src, uid=None):
38
+
39
+ uid_col = f'{src}_uids'
40
+ completion_col = f'{src}_completions'
41
+ nsfw_col = f'{src}_nsfw_scores'
42
+ reward_col = f'{src}_rewards'
43
+
44
+ if uid is not None:
45
+ df_long = df_long.loc[df_long[uid_col] == uid]
46
+
47
+ col1, col2, col3 = st.columns(3)
48
+ col1.metric(
49
+ label="Success %",
50
+ value=f'{df_long.loc[df_long[completion_col].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}'
51
+ )
52
+ col2.metric(
53
+ label="Diversity %",
54
+ value=f'{df_long[completion_col].nunique()/df_long.shape[0] * 100:.1f}'
55
+ )
56
+ col3.metric(
57
+ label="Toxicity %",
58
+ value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else 'N/A'
59
+ )
60
+ st.markdown('----')
opendashboards/assets/plot.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import utils.plotting as plotting
4
+
5
+ # @st.cache_data
6
+ def uid_diversty(df, rm_failed=True):
7
+ return st.plotly_chart(
8
+ plotting.plot_uid_diversty(
9
+ df,
10
+ remove_unsuccessful=rm_failed
11
+ ),
12
+ use_container_width=True
13
+ )
14
+
15
+ # @st.cache_data
16
+ def leaderboard(df, ntop, group_on, agg_col, agg, alias=False):
17
+ return st.plotly_chart(
18
+ plotting.plot_leaderboard(
19
+ df,
20
+ ntop=ntop,
21
+ group_on=group_on,
22
+ agg_col=agg_col,
23
+ agg=agg,
24
+ alias=alias
25
+ ),
26
+ use_container_width=True
27
+ )
28
+
29
+ # @st.cache_data
30
+ def completion_rewards(df, completion_col, reward_col, uid_col, ntop, completions=None, completion_regex=None):
31
+ return st.plotly_chart(
32
+ plotting.plot_completion_rewards(
33
+ df,
34
+ msg_col=completion_col,
35
+ reward_col=reward_col,
36
+ uid_col=uid_col,
37
+ ntop=ntop,
38
+ completions=completions,
39
+ completion_regex=completion_regex
40
+ ),
41
+ use_container_width=True
42
+ )
43
+
44
+ def weights(df, uids, ntop=10):
45
+ return st.plotly_chart(
46
+ plotting.plot_weights(
47
+ df,
48
+ uids=[f'UID-{i}' for i in uids],
49
+ ntop=ntop
50
+ ),
51
+ use_container_width=True
52
+ )
opendashboards/dashboard.py CHANGED
@@ -1,148 +1,29 @@
1
  import streamlit as st
2
- import pandas as pd
3
- import os
4
- import re
5
- import time
6
- from .utils import get_runs, download_data, get_list_col_lengths, explode_data
7
- import .plotting as plotting
8
-
9
 
10
  # dendrite time versus completion length
11
  # prompt-based completion score stats
12
-
13
 
14
  DEFAULT_PROJECT = "openvalidators"
15
  DEFAULT_FILTERS = {"tags": {"$in": ["1.0.0", "1.0.1", "1.0.2", "1.0.3", "1.0.4"]}}
16
-
17
- @st.cache_data
18
- def load_runs(project=DEFAULT_PROJECT, filters=DEFAULT_FILTERS, min_steps=10):
19
- runs = []
20
- msg = st.empty()
21
- for run in get_runs(project, filters):
22
- step = run.summary.get('_step',0)
23
- if step < min_steps:
24
- msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
25
- continue
26
-
27
- duration = run.summary.get('_runtime')
28
- end_time = run.summary.get('_timestamp')
29
- # extract values for selected tags
30
- rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
31
- # tags = {k: tag for k,tag in zip(('hotkey','version','spec_version'),run.tags)}
32
- tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
33
- # include bool flag for remaining tags
34
- tags.update({k: k in run.tags for k in ('mock','custom_gating_model','nsfw_filter','outsource_scoring','disable_set_weights')})
35
-
36
- runs.append({
37
- 'state': run.state,
38
- 'num_steps': step,
39
- 'entity': run.entity,
40
- 'id': run.id,
41
- 'name': run.name,
42
- 'project': run.project,
43
- 'url': run.url,
44
- 'path': os.path.join(run.entity, run.project, run.id),
45
- 'start_time': pd.to_datetime(end_time-duration, unit="s"),
46
- 'end_time': pd.to_datetime(end_time, unit="s"),
47
- 'duration': pd.to_datetime(duration, unit="s"),
48
- # 'tags': run.tags,
49
- **tags
50
- })
51
- msg.empty()
52
- return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
53
-
54
-
55
- @st.cache_data
56
- def load_data(selected_runs, load=True, save=False):
57
-
58
- frames = []
59
- n_events = 0
60
- progress = st.progress(0, 'Loading data')
61
- for i, idx in enumerate(selected_runs.index):
62
- run = selected_runs.loc[idx]
63
- prog_msg = f'Loading data {i/len(selected_runs)*100:.0f}% ({i}/{len(selected_runs)} runs, {n_events} events)'
64
-
65
- file_path = f'data/history-{run.id}.csv'
66
-
67
- if load and os.path.exists(file_path):
68
- progress.progress(i/len(selected_runs),f'{prog_msg}... reading {file_path}')
69
- df = pd.read_csv(file_path)
70
- # filter out events with missing step length
71
- df = df.loc[df.step_length.notna()]
72
-
73
- # detect list columns which as stored as strings
74
- list_cols = [c for c in df.columns if df[c].dtype == "object" and df[c].str.startswith("[").all()]
75
- # convert string representation of list to list
76
- df[list_cols] = df[list_cols].applymap(eval, na_action='ignore')
77
-
78
- else:
79
- try:
80
- # Download the history from wandb
81
- progress.progress(i/len(selected_runs),f'{prog_msg}... downloading `{run.path}`')
82
- df = download_data(run.path)
83
- df.assign(**run.to_dict())
84
- if not os.path.exists('data/'):
85
- os.makedirs(file_path)
86
-
87
- if save and run.state != 'running':
88
- df.to_csv(file_path, index=False)
89
- # st.info(f'Saved history to {file_path}')
90
- except Exception as e:
91
- st.error(f'Failed to download history for `{run.path}`')
92
- st.exception(e)
93
- continue
94
-
95
- frames.append(df)
96
- n_events += df.shape[0]
97
-
98
- progress.empty()
99
- # Remove rows which contain chain weights as it messes up schema
100
- return pd.concat(frames)
101
-
102
- @st.cache_data
103
- def get_exploded_data(df):
104
- list_cols = get_list_col_lengths(df)
105
- return explode_data(df, list(list_cols))
106
-
107
- @st.cache_data
108
- def get_completions(df_long, col):
109
- return df_long[col].value_counts()
110
-
111
- @st.cache_data
112
- def plot_uid_diversty(df, remove_unsuccessful=True):
113
- return plotting.plot_uid_diversty(df, remove_unsuccessful=remove_unsuccessful)
114
-
115
- @st.cache_data
116
- def plot_leaderboard(df, ntop, group_on, agg_col, agg, alias=False):
117
- return plotting.plot_leaderboard(df, ntop=ntop, group_on=group_on, agg_col=agg_col, agg=agg, alias=alias)
118
-
119
- @st.cache_data
120
- def plot_completion_rewards(df, completion_col, reward_col, uid_col, ntop, completions=None, completion_regex=None):
121
- return plotting.plot_completion_rewards(df, msg_col=completion_col, reward_col=reward_col, uid_col=uid_col, ntop=ntop, completions=completions, completion_regex=completion_regex)
122
-
123
- @st.cache_data
124
- def uid_metrics(df_long, src, uid=None):
125
-
126
- uid_col = f'{src}_uids'
127
- completion_col = f'{src}_completions'
128
- nsfw_col = f'{src}_nsfw_scores'
129
- reward_col = f'{src}_rewards'
130
-
131
- if uid is not None:
132
- df_long = df_long.loc[df_long[uid_col] == uid]
133
-
134
- col1, col2, col3 = st.columns(3)
135
- col1.metric(
136
- label="Success %",
137
- value=f'{df_long.loc[df_long[completion_col].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}'
138
- )
139
- col2.metric(
140
- label="Diversity %",
141
- value=f'{df_long[completion_col].nunique()/df_long.shape[0] * 100:.1f}'
142
- )
143
- col3.metric(
144
- label="Toxicity %",
145
- value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else 'N/A'
146
  )
147
 
148
  st.title('Validator :red[Analysis] Dashboard :eyes:')
@@ -150,155 +31,132 @@ st.title('Validator :red[Analysis] Dashboard :eyes:')
150
  st.markdown('#')
151
  st.markdown('#')
152
 
153
-
154
- with st.sidebar:
155
- st.sidebar.header('Pages')
156
 
157
  with st.spinner(text=f'Checking wandb...'):
158
- df_runs = load_runs()
159
- # get rows where start time is older than 24h ago
160
- df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
161
-
162
- col1, col2, col3 = st.columns(3)
163
-
164
- col1.metric('Runs', df_runs.shape[0], delta=f'{df_runs.shape[0]-df_runs_old.shape[0]} (24h)')
165
- col2.metric('Hotkeys', df_runs.hotkey.nunique(), delta=f'{df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique()} (24h)')
166
- col3.metric('Events', df_runs.num_steps.sum(), delta=f'{df_runs.num_steps.sum()-df_runs_old.num_steps.sum()} (24h)')
167
-
168
- # https://wandb.ai/opentensor-dev/openvalidators/runs/kt9bzxii/overview?workspace=
169
- # all_run_paths = ['opentensor-dev/openvalidators/kt9bzxii'] # pedro long run
170
 
171
- run_ids = df_runs.id
172
- default_selected_runs = ['kt9bzxii']
173
- selected_runs = default_selected_runs
174
 
175
  # add vertical space
176
  st.markdown('#')
177
  st.markdown('#')
178
 
179
-
180
  tab1, tab2, tab3, tab4 = st.tabs(["Wandb Runs", "UID Health", "Completions", "Prompt-based scoring"])
181
 
182
- # src = st.radio('Choose data source:', ['followup', 'answer'], horizontal=True, key='src')
183
- # list_list_cols = get_list_col_lengths(df_long)
184
- # df_long_long = explode_data(df_long, list(list_list_cols))
185
 
 
186
  with tab1:
187
 
188
  st.markdown('#')
189
- st.subheader(":violet[Wandb] Runs")
190
 
191
- # Load data
192
- df = load_data(df_runs.loc[run_ids.isin(selected_runs)], load=True, save=True)
193
- df_long = get_exploded_data(df)
194
 
195
- col1, col2, col3, col4 = st.columns(4)
196
- col1.metric(label="Selected runs", value=len(selected_runs))
197
- col2.metric(label="Events", value=df.shape[0]) #
198
- col3.metric(label="UIDs", value=df_long.followup_uids.nunique())
199
- col4.metric(label="Unique completions", value=df_long.followup_uids.nunique())
 
200
 
201
- selected_runs = st.multiselect(f'Runs ({len(df_runs)})', run_ids, default=selected_runs)
 
 
202
 
203
- st.markdown('#')
204
- st.subheader("View :violet[Data]")
205
-
206
- show_col1, show_col2 = st.columns(2)
207
- show_runs = show_col1.checkbox('Show runs', value=True)
208
- show_events = show_col2.checkbox('Show events', value=False)
209
- if show_runs:
210
- st.markdown(f'Wandb info for **{len(selected_runs)} selected runs**:')
211
- st.dataframe(df_runs.loc[run_ids.isin(selected_runs)],
212
- column_config={
213
- "url": st.column_config.LinkColumn("URL"),
214
- }
215
- )
216
 
217
- if show_events:
218
- st.markdown(f'Raw events for **{len(selected_runs)} selected runs**:')
219
- st.dataframe(df.head(50),
220
- column_config={
221
- "url": st.column_config.LinkColumn("URL"),
222
- }
223
- )
224
 
225
- default_src = 'followup'
226
  with tab2:
227
 
228
  st.markdown('#')
229
- st.subheader("UID :violet[Health]")
230
- uid_src = default_src
231
 
232
- # uid = st.selectbox('UID:', sorted(df_long[uid_col].unique()), key='uid')
233
-
234
- uid_metrics(df_long, uid_src)
235
  uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
236
- uid_col = f'{uid_src}_uids'
237
- reward_col = f'{uid_src}_rewards'
238
 
239
- st.markdown('#')
240
- st.subheader("UID :violet[Leaderboard]")
241
- uid_ntop_default = 10
242
 
243
- uid_col1, uid_col2 = st.columns(2)
244
- uid_ntop = uid_col1.slider('Number of UIDs:', min_value=1, max_value=50, value=uid_ntop_default, key='uid_ntop')
245
- uid_agg = uid_col2.selectbox('Aggregation:', ('mean','min','max','size','nunique'), key='uid_agg')
246
 
247
- st.plotly_chart(
248
- plot_leaderboard(
249
- df,
250
- ntop=uid_ntop,
251
- group_on=uid_col,
252
- agg_col=reward_col,
253
- agg=uid_agg
254
- )
255
- )
256
- remove_unsuccessful = st.checkbox('Remove failed completions', value=True)
257
- st.plotly_chart(
258
- plot_uid_diversty(
259
- df,
260
- remove_unsuccessful=remove_unsuccessful
261
  )
262
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
 
265
- completion_ntop_default = 10
266
  with tab3:
267
 
268
  st.markdown('#')
269
  st.subheader('Completion :violet[Leaderboard]')
270
- completion_src = default_src
271
 
272
  msg_col1, msg_col2 = st.columns(2)
273
  completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
274
- completion_ntop = msg_col2.slider('Top k:', min_value=1, max_value=50, value=completion_ntop_default, key='completion_ntop')
 
 
275
 
276
  completion_col = f'{completion_src}_completions'
277
  reward_col = f'{completion_src}_rewards'
278
  uid_col = f'{completion_src}_uids'
279
 
280
- completions = get_completions(df_long, completion_col)
281
 
282
- # completion_sel = st.radio('Select input method:', ['ntop', 'select','regex'], horizontal=True, key='completion_sel')
283
  # Get completions with highest average rewards
284
- st.plotly_chart(
285
- plot_leaderboard(
286
- df,
287
- ntop=completion_ntop,
288
- group_on=completion_col,
289
- agg_col=reward_col,
290
- agg='mean',
291
- alias=True
292
- )
293
  )
294
- st.markdown('#')
295
- st.subheader('Completion :violet[Rewards]')
296
 
297
- completion_select = st.multiselect('Completions:', completions.index, default=completions.index[:3].tolist())
298
- # completion_regex = st.text_input('Completion regex:', value='', key='completion_regex')
 
 
299
 
300
- st.plotly_chart(
301
- plot_completion_rewards(
 
 
302
  df,
303
  completion_col=completion_col,
304
  reward_col=reward_col,
@@ -306,12 +164,12 @@ with tab3:
306
  ntop=completion_ntop,
307
  completions=completion_select,
308
  )
309
- )
310
 
311
- with tab4:
312
- st.subheader(':pink[Prompt-based scoring]')
313
- prompt_src = st.radio('Select one:', ['followup', 'answer'], key='prompt')
314
 
 
 
 
 
315
 
316
  # st.dataframe(df_long_long.filter(regex=prompt_src).head())
317
 
 
1
  import streamlit as st
2
+ from opendashboards.assets import io, inspect, metric, plot
 
 
 
 
 
 
3
 
4
  # dendrite time versus completion length
5
  # prompt-based completion score stats
6
+ # instrospect specific RUN-UID-COMPLETION
7
 
8
  DEFAULT_PROJECT = "openvalidators"
9
  DEFAULT_FILTERS = {"tags": {"$in": ["1.0.0", "1.0.1", "1.0.2", "1.0.3", "1.0.4"]}}
10
+ DEFAULT_SELECTED_RUNS = ['kt9bzxii']
11
+ DEFAULT_SRC = 'followup'
12
+ DEFAULT_COMPLETION_NTOP = 10
13
+ DEFAULT_UID_NTOP = 10
14
+
15
+ # Set app config
16
+ st.set_page_config(
17
+ page_title='Validator Dashboard',
18
+ menu_items={
19
+ 'Report a bug': "https://github.com/opentensor/dashboards/issues",
20
+ 'About': """
21
+ This dashboard is part of the OpenTensor project. \n
22
+ To see runs in wandb, go to: \n
23
+ https://wandb.ai/opentensor-dev/openvalidators/table?workspace=default
24
+ """
25
+ },
26
+ layout = "centered"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  )
28
 
29
  st.title('Validator :red[Analysis] Dashboard :eyes:')
 
31
  st.markdown('#')
32
  st.markdown('#')
33
 
34
+ # with st.sidebar:
35
+ # st.sidebar.header('Pages')
 
36
 
37
  with st.spinner(text=f'Checking wandb...'):
38
+ df_runs = io.load_runs(project=DEFAULT_PROJECT, filters=DEFAULT_FILTERS, min_steps=10)
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ metric.wandb(df_runs)
 
 
41
 
42
  # add vertical space
43
  st.markdown('#')
44
  st.markdown('#')
45
 
 
46
  tab1, tab2, tab3, tab4 = st.tabs(["Wandb Runs", "UID Health", "Completions", "Prompt-based scoring"])
47
 
 
 
 
48
 
49
+ ### Wandb Runs ###
50
  with tab1:
51
 
52
  st.markdown('#')
53
+ st.header(":violet[Wandb] Runs")
54
 
55
+ run_msg = st.info("Select a single run or compare multiple runs")
56
+ selected_runs = st.multiselect(f'Runs ({len(df_runs)})', df_runs.id, default=DEFAULT_SELECTED_RUNS, key='runs')
 
57
 
58
+ # Load data if new runs selected
59
+ if not selected_runs:
60
+ # open a dialog to select runs
61
+ run_msg.error("Please select at least one run")
62
+ st.snow()
63
+ st.stop()
64
 
65
+ df = io.load_data(df_runs.loc[df_runs.id.isin(selected_runs)], load=True, save=True)
66
+ df_long = inspect.explode_data(df)
67
+ df_weights = inspect.weights(df)
68
 
69
+ metric.runs(df, df_long, selected_runs)
70
+
71
+ with st.expander(f'Show :violet[raw] data for {len(selected_runs)} selected runs'):
72
+ inspect.run_event_data(df_runs,df, selected_runs)
 
 
 
 
 
 
 
 
 
73
 
 
 
 
 
 
 
 
74
 
75
+ ### UID Health ###
76
  with tab2:
77
 
78
  st.markdown('#')
79
+ st.header("UID :violet[Health]")
80
+ st.info(f"Showing UID health metrics for **{len(selected_runs)} selected runs**")
81
 
 
 
 
82
  uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
 
 
83
 
84
+ metric.uids(df_long, uid_src)
 
 
85
 
86
+ with st.expander(f'Show UID **{uid_src}** weights data for **{len(selected_runs)} selected runs**'):
 
 
87
 
88
+ uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
89
+ st.markdown('#')
90
+ st.subheader(f"UID {uid_src.title()} :violet[Weights]")
91
+
92
+ plot.weights(
93
+ df_weights,
94
+ uids=uids,
 
 
 
 
 
 
 
95
  )
96
+
97
+ with st.expander(f'Show UID **{uid_src}** leaderboard data for **{len(selected_runs)} selected runs**'):
98
+
99
+ st.markdown('#')
100
+ st.subheader(f"UID {uid_src.title()} :violet[Leaderboard]")
101
+ uid_col1, uid_col2 = st.columns(2)
102
+ uid_ntop = uid_col1.slider('Number of UIDs:', min_value=1, max_value=50, value=DEFAULT_UID_NTOP, key='uid_ntop')
103
+ uid_agg = uid_col2.selectbox('Aggregation:', ('mean','min','max','size','nunique'), key='uid_agg')
104
+
105
+ plot.leaderboard(
106
+ df,
107
+ ntop=uid_ntop,
108
+ group_on=f'{uid_src}_uids',
109
+ agg_col=f'{uid_src}_rewards',
110
+ agg=uid_agg
111
+ )
112
+
113
+
114
+ with st.expander(f'Show UID **{uid_src}** diversity data for **{len(selected_runs)} selected runs**'):
115
+
116
+ st.markdown('#')
117
+ st.subheader(f"UID {uid_src.title()} :violet[Diversity]")
118
+ rm_failed = st.checkbox(f'Remove failed **{uid_src}** completions', value=True)
119
+ plot.uid_diversty(df, rm_failed)
120
 
121
 
122
+ ### Completions ###
123
  with tab3:
124
 
125
  st.markdown('#')
126
  st.subheader('Completion :violet[Leaderboard]')
127
+ completion_info = st.empty()
128
 
129
  msg_col1, msg_col2 = st.columns(2)
130
  completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
131
+ completion_info.info(f"Showing **{completion_src}** completions for **{len(selected_runs)} selected runs**")
132
+
133
+ completion_ntop = msg_col2.slider('Top k:', min_value=1, max_value=50, value=DEFAULT_COMPLETION_NTOP, key='completion_ntop')
134
 
135
  completion_col = f'{completion_src}_completions'
136
  reward_col = f'{completion_src}_rewards'
137
  uid_col = f'{completion_src}_uids'
138
 
139
+ completions = inspect.completions(df_long, completion_col)
140
 
 
141
  # Get completions with highest average rewards
142
+ plot.leaderboard(
143
+ df,
144
+ ntop=completion_ntop,
145
+ group_on=completion_col,
146
+ agg_col=reward_col,
147
+ agg='mean',
148
+ alias=True
 
 
149
  )
 
 
150
 
151
+ with st.expander(f'Show **{completion_src}** completion rewards data for **{len(selected_runs)} selected runs**'):
152
+
153
+ st.markdown('#')
154
+ st.subheader('Completion :violet[Rewards]')
155
 
156
+ completion_select = st.multiselect('Completions:', completions.index, default=completions.index[:3].tolist())
157
+ # completion_regex = st.text_input('Completion regex:', value='', key='completion_regex')
158
+
159
+ plot.completion_rewards(
160
  df,
161
  completion_col=completion_col,
162
  reward_col=reward_col,
 
164
  ntop=completion_ntop,
165
  completions=completion_select,
166
  )
 
167
 
 
 
 
168
 
169
+ ### Prompt-based scoring ###
170
+ with tab4:
171
+ # coming soon
172
+ st.info('Prompt-based scoring coming soon')
173
 
174
  # st.dataframe(df_long_long.filter(regex=prompt_src).head())
175
 
opendashboards/hello_world.py DELETED
@@ -1,3 +0,0 @@
1
- import streamlit as st
2
- from utils.utils import test
3
- st.write('HELLO BOITCHES')
 
 
 
 
opendashboards/utils/plotting.py CHANGED
@@ -53,7 +53,7 @@ def plot_weights(scores: pd.DataFrame, ntop: int = 20, uids: List[Union[str, int
53
  """
54
 
55
  # Select subset of columns for plotting
56
- if uids is None:
57
  uids = scores.columns[:ntop]
58
  print(f"Using first {ntop} uids for plotting: {uids}")
59
 
@@ -249,6 +249,7 @@ def plot_leaderboard(
249
  else:
250
  index = rankings.index.astype(str)
251
 
 
252
  return px.bar(
253
  x=rankings,
254
  y=index,
 
53
  """
54
 
55
  # Select subset of columns for plotting
56
+ if not uids:
57
  uids = scores.columns[:ntop]
58
  print(f"Using first {ntop} uids for plotting: {uids}")
59
 
 
249
  else:
250
  index = rankings.index.astype(str)
251
 
252
+ print(f"Using top {ntop} {group_on} by {agg_col}: \n{rankings}")
253
  return px.bar(
254
  x=rankings,
255
  y=index,
opendashboards/utils/utils.py CHANGED
@@ -24,10 +24,6 @@ from pandas.api.types import is_list_like
24
  from typing import List, Dict, Any, Union
25
 
26
 
27
- def test():
28
- print ("Hello World!")
29
-
30
-
31
  def get_runs(project: str = "openvalidators", filters: Dict[str, Any] = None, return_paths: bool = False) -> List:
32
  """Download runs from wandb.
33
 
@@ -78,6 +74,7 @@ def download_data(run_path: Union[str, List] = None, timeout: float = 600) -> pd
78
  pbar.set_postfix({"total_events": total_events})
79
 
80
  df = pd.concat(frames)
 
81
  # Convert timestamp to datetime.
82
  df._timestamp = pd.to_datetime(df._timestamp, unit="s")
83
  df.sort_values("_timestamp", inplace=True)
@@ -95,7 +92,7 @@ def load_data(path: str, nrows: int = None):
95
  list_cols = [c for c in df.columns if df[c].dtype == "object" and df[c].str.startswith("[").all()]
96
  # convert string representation of list to list
97
  df[list_cols] = df[list_cols].applymap(eval, na_action='ignore')
98
-
99
  return df
100
 
101
 
@@ -128,4 +125,4 @@ def get_list_col_lengths(df: pd.DataFrame) -> Dict[str, int]:
128
  if len(varying_lengths) > 0:
129
  print(f"The following columns have varying lengths: {varying_lengths}")
130
 
131
- return {c: v[0] for c, v in list_col_lengths.items()}
 
24
  from typing import List, Dict, Any, Union
25
 
26
 
 
 
 
 
27
  def get_runs(project: str = "openvalidators", filters: Dict[str, Any] = None, return_paths: bool = False) -> List:
28
  """Download runs from wandb.
29
 
 
74
  pbar.set_postfix({"total_events": total_events})
75
 
76
  df = pd.concat(frames)
77
+
78
  # Convert timestamp to datetime.
79
  df._timestamp = pd.to_datetime(df._timestamp, unit="s")
80
  df.sort_values("_timestamp", inplace=True)
 
92
  list_cols = [c for c in df.columns if df[c].dtype == "object" and df[c].str.startswith("[").all()]
93
  # convert string representation of list to list
94
  df[list_cols] = df[list_cols].applymap(eval, na_action='ignore')
95
+
96
  return df
97
 
98
 
 
125
  if len(varying_lengths) > 0:
126
  print(f"The following columns have varying lengths: {varying_lengths}")
127
 
128
+ return {c: v[0] for c, v in list_col_lengths.items() if v}
requirements.txt CHANGED
@@ -1,3 +1,5 @@
 
 
1
  wandb==0.15.3
2
  datasets==2.12.0
3
  plotly==5.14.1
 
1
+ streamlit==1.23.1
2
+ streamlit-plotly-events==0.0.6
3
  wandb==0.15.3
4
  datasets==2.12.0
5
  plotly==5.14.1
setup.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # The MIT License (MIT)
2
+ # Copyright © 2021 Yuma Rao
3
+
4
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
5
+ # documentation files (the “Software”), to deal in the Software without restriction, including without limitation
6
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
7
+ # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8
+
9
+ # The above copyright notice and this permission notice shall be included in all copies or substantial portions of
10
+ # the Software.
11
+
12
+ # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
13
+ # THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
14
+ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
15
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
16
+ # DEALINGS IN THE SOFTWARE.
17
+ import pathlib
18
+ import pkg_resources
19
+ from setuptools import setup
20
+
21
+
22
+ def read(fname):
23
+ this_directory = pathlib.Path(__file__).parent
24
+ long_description = (this_directory / fname).read_text()
25
+ return long_description
26
+
27
+
28
+ def read_requirements(path):
29
+ with pathlib.Path(path).open() as requirements_txt:
30
+ return [str(requirement) for requirement in pkg_resources.parse_requirements(requirements_txt)]
31
+
32
+
33
+ def get_version(rel_path):
34
+ for line in read(rel_path).splitlines():
35
+ if line.startswith("__version__"):
36
+ delim = '"' if '"' in line else "'"
37
+ return line.split(delim)[1]
38
+ else:
39
+ raise RuntimeError("Unable to find version string.")
40
+
41
+
42
+ requirements = read_requirements("requirements.txt")
43
+
44
+
45
+ setup(
46
+ name="opendashboards",
47
+ version=get_version("opendashboards/__init__.py"),
48
+ description="Opendashboards is a collection of open source validators for the Bittensor Network.",
49
+ url="https://github.com/opentensor/opendashboards",
50
+ author="bittensor.com",
51
+ packages=["opendashboards"],
52
+ include_package_data=True,
53
+ author_email="",
54
+ license="MIT",
55
+ long_description=read("README.md"),
56
+ long_description_content_type="text/markdown",
57
+ # entry_points={
58
+ # "console_scripts": ["foundation-validator = openvalidators.neuron:main"],
59
+ # },
60
+ install_requires=requirements,
61
+ python_requires=">=3.8",
62
+ classifiers=[
63
+ "Intended Audience :: Developers",
64
+ "Topic :: Software Development :: Build Tools",
65
+ "License :: OSI Approved :: MIT License",
66
+ "Programming Language :: Python :: 3 :: Only",
67
+ "Programming Language :: Python :: 3.8",
68
+ "Programming Language :: Python :: 3.9",
69
+ "Programming Language :: Python :: 3.10",
70
+ "Topic :: Scientific/Engineering",
71
+ "Topic :: Scientific/Engineering :: Mathematics",
72
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
73
+ "Topic :: Software Development",
74
+ "Topic :: Software Development :: Libraries",
75
+ "Topic :: Software Development :: Libraries :: Python Modules",
76
+ ],
77
+ maintainer="",
78
+ maintainer_email="",
79
+ keywords=[
80
+ "bittensor",
81
+ "validator",
82
+ "dashboard",
83
+ "analytics",
84
+ "ai",
85
+ "machine-learning",
86
+ "deep-learning",
87
+ "blockchain",
88
+ "pytorch",
89
+ "torch",
90
+ "neural-networks",
91
+ "cryptocurrency",
92
+ ],
93
+ )