Spaces:
Paused
Paused
Merge pull request #4 from opentensor/aggregations
Browse files- dashboard.py +100 -57
- meta_plotting.py +48 -0
- meta_utils.py +59 -0
- metadash.py +114 -0
- multigraph.py +112 -0
- multistats.py +348 -0
- opendashboards/assets/inspect.py +9 -13
- opendashboards/assets/io.py +128 -16
- opendashboards/assets/metric.py +58 -24
- opendashboards/assets/plot.py +30 -0
- opendashboards/utils/aggregate.py +52 -0
- opendashboards/utils/plotting.py +116 -35
dashboard.py
CHANGED
@@ -1,13 +1,16 @@
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
from opendashboards.assets import io, inspect, metric, plot
|
3 |
|
4 |
-
# dendrite time versus completion length
|
5 |
# prompt-based completion score stats
|
6 |
# instrospect specific RUN-UID-COMPLETION
|
|
|
|
|
7 |
|
8 |
DEFAULT_PROJECT = "openvalidators"
|
9 |
-
DEFAULT_FILTERS = {"tags": {"$in": [
|
10 |
-
|
11 |
DEFAULT_SRC = 'followup'
|
12 |
DEFAULT_COMPLETION_NTOP = 10
|
13 |
DEFAULT_UID_NTOP = 10
|
@@ -31,8 +34,6 @@ st.title('Validator :red[Analysis] Dashboard :eyes:')
|
|
31 |
st.markdown('#')
|
32 |
st.markdown('#')
|
33 |
|
34 |
-
# with st.sidebar:
|
35 |
-
# st.sidebar.header('Pages')
|
36 |
|
37 |
with st.spinner(text=f'Checking wandb...'):
|
38 |
df_runs = io.load_runs(project=DEFAULT_PROJECT, filters=DEFAULT_FILTERS, min_steps=10)
|
@@ -43,58 +44,83 @@ metric.wandb(df_runs)
|
|
43 |
st.markdown('#')
|
44 |
st.markdown('#')
|
45 |
|
46 |
-
tab1, tab2, tab3, tab4 = st.tabs(["
|
47 |
-
|
48 |
|
49 |
### Wandb Runs ###
|
50 |
with tab1:
|
51 |
|
52 |
st.markdown('#')
|
53 |
-
st.
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
st.stop()
|
64 |
|
65 |
-
|
66 |
-
df_long = inspect.explode_data(df)
|
67 |
-
df_weights = inspect.weights(df)
|
68 |
-
|
69 |
-
metric.runs(df, df_long, selected_runs)
|
70 |
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
|
|
74 |
|
75 |
### UID Health ###
|
|
|
|
|
76 |
with tab2:
|
77 |
|
78 |
st.markdown('#')
|
79 |
-
st.
|
80 |
-
st.info(f"Showing UID health metrics for **{
|
81 |
|
82 |
-
uid_src = st.radio('Select
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
|
86 |
-
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
uids =
|
89 |
-
st.markdown('#')
|
90 |
-
st.subheader(f"UID {uid_src.title()} :violet[Weights]")
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
uids=uids,
|
95 |
-
)
|
96 |
-
|
97 |
-
with st.expander(f'Show UID **{uid_src}** leaderboard data for **{len(selected_runs)} selected runs**'):
|
98 |
|
99 |
st.markdown('#')
|
100 |
st.subheader(f"UID {uid_src.title()} :violet[Leaderboard]")
|
@@ -103,15 +129,15 @@ with tab2:
|
|
103 |
uid_agg = uid_col2.selectbox('Aggregation:', ('mean','min','max','size','nunique'), key='uid_agg')
|
104 |
|
105 |
plot.leaderboard(
|
106 |
-
|
107 |
ntop=uid_ntop,
|
108 |
-
group_on=
|
109 |
-
agg_col=
|
110 |
agg=uid_agg
|
111 |
)
|
112 |
|
113 |
|
114 |
-
with st.expander(f'Show UID **{uid_src}** diversity data for **{
|
115 |
|
116 |
st.markdown('#')
|
117 |
st.subheader(f"UID {uid_src.title()} :violet[Diversity]")
|
@@ -127,28 +153,27 @@ with tab3:
|
|
127 |
completion_info = st.empty()
|
128 |
|
129 |
msg_col1, msg_col2 = st.columns(2)
|
130 |
-
completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
|
131 |
-
|
|
|
132 |
|
133 |
-
|
134 |
|
135 |
-
|
136 |
-
reward_col = f'{completion_src}_rewards'
|
137 |
-
uid_col = f'{completion_src}_uids'
|
138 |
|
139 |
-
completions = inspect.completions(df_long,
|
140 |
|
141 |
# Get completions with highest average rewards
|
142 |
plot.leaderboard(
|
143 |
-
|
144 |
ntop=completion_ntop,
|
145 |
-
group_on=
|
146 |
-
agg_col=
|
147 |
agg='mean',
|
148 |
alias=True
|
149 |
)
|
150 |
|
151 |
-
with st.expander(f'Show **{completion_src}** completion rewards data for **{
|
152 |
|
153 |
st.markdown('#')
|
154 |
st.subheader('Completion :violet[Rewards]')
|
@@ -157,19 +182,37 @@ with tab3:
|
|
157 |
# completion_regex = st.text_input('Completion regex:', value='', key='completion_regex')
|
158 |
|
159 |
plot.completion_rewards(
|
160 |
-
|
161 |
-
completion_col=
|
162 |
-
reward_col=
|
163 |
-
uid_col=
|
164 |
ntop=completion_ntop,
|
165 |
completions=completion_select,
|
166 |
)
|
|
|
|
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
### Prompt-based scoring ###
|
170 |
with tab4:
|
171 |
# coming soon
|
172 |
st.info('Prompt-based scoring coming soon')
|
|
|
173 |
|
174 |
# st.dataframe(df_long_long.filter(regex=prompt_src).head())
|
175 |
|
|
|
1 |
+
import time
|
2 |
+
import pandas as pd
|
3 |
import streamlit as st
|
4 |
from opendashboards.assets import io, inspect, metric, plot
|
5 |
|
|
|
6 |
# prompt-based completion score stats
|
7 |
# instrospect specific RUN-UID-COMPLETION
|
8 |
+
# cache individual file loads
|
9 |
+
# Hotkey churn
|
10 |
|
11 |
DEFAULT_PROJECT = "openvalidators"
|
12 |
+
DEFAULT_FILTERS = {"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
|
13 |
+
DEFAULT_SELECTED_HOTKEYS = None
|
14 |
DEFAULT_SRC = 'followup'
|
15 |
DEFAULT_COMPLETION_NTOP = 10
|
16 |
DEFAULT_UID_NTOP = 10
|
|
|
34 |
st.markdown('#')
|
35 |
st.markdown('#')
|
36 |
|
|
|
|
|
37 |
|
38 |
with st.spinner(text=f'Checking wandb...'):
|
39 |
df_runs = io.load_runs(project=DEFAULT_PROJECT, filters=DEFAULT_FILTERS, min_steps=10)
|
|
|
44 |
st.markdown('#')
|
45 |
st.markdown('#')
|
46 |
|
47 |
+
tab1, tab2, tab3, tab4 = st.tabs(["Raw Data", "UID Health", "Completions", "Prompt-based scoring"])
|
|
|
48 |
|
49 |
### Wandb Runs ###
|
50 |
with tab1:
|
51 |
|
52 |
st.markdown('#')
|
53 |
+
st.subheader(":violet[Run] Data")
|
54 |
+
with st.expander(f'Show :violet[raw] wandb data'):
|
55 |
+
|
56 |
+
edited_df = st.data_editor(
|
57 |
+
df_runs.assign(Select=False).set_index('Select'),
|
58 |
+
column_config={"Select": st.column_config.CheckboxColumn(required=True)},
|
59 |
+
disabled=df_runs.columns,
|
60 |
+
use_container_width=True,
|
61 |
+
)
|
62 |
+
df_runs_subset = df_runs[edited_df.index==True]
|
63 |
+
n_runs = len(df_runs_subset)
|
64 |
+
|
65 |
+
if n_runs:
|
66 |
+
df = io.load_data(df_runs_subset, load=True, save=True)
|
67 |
+
df = inspect.clean_data(df)
|
68 |
+
print(f'\nNans in columns: {df.isna().sum()}')
|
69 |
+
df_long = inspect.explode_data(df)
|
70 |
+
else:
|
71 |
+
st.info(f'You must select at least one run to load data')
|
72 |
st.stop()
|
73 |
|
74 |
+
metric.runs(df_long)
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
st.markdown('#')
|
77 |
+
st.subheader(":violet[Event] Data")
|
78 |
+
with st.expander(f'Show :violet[raw] event data for **{n_runs} selected runs**'):
|
79 |
+
raw_data_col1, raw_data_col2 = st.columns(2)
|
80 |
+
use_long_checkbox = raw_data_col1.checkbox('Use long format', value=True)
|
81 |
+
num_rows = raw_data_col2.slider('Number of rows:', min_value=1, max_value=100, value=10, key='num_rows')
|
82 |
+
st.dataframe(df_long.head(num_rows) if use_long_checkbox else df.head(num_rows),
|
83 |
+
use_container_width=True)
|
84 |
|
85 |
+
step_types = ['all']+['augment','followup','answer']#list(df.name.unique())
|
86 |
|
87 |
### UID Health ###
|
88 |
+
# TODO: Live time - time elapsed since moving_averaged_score for selected UID was 0 (lower bound so use >Time)
|
89 |
+
# TODO: Weight - Most recent weight for selected UID (Add warning if weight is 0 or most recent timestamp is not current)
|
90 |
with tab2:
|
91 |
|
92 |
st.markdown('#')
|
93 |
+
st.subheader("UID :violet[Health]")
|
94 |
+
st.info(f"Showing UID health metrics for **{n_runs} selected runs**")
|
95 |
|
96 |
+
uid_src = st.radio('Select event type:', step_types, horizontal=True, key='uid_src')
|
97 |
+
df_uid = df_long[df_long.name.str.contains(uid_src)] if uid_src != 'all' else df_long
|
98 |
+
|
99 |
+
metric.uids(df_uid, uid_src)
|
100 |
+
uids = st.multiselect('UID:', sorted(df_uid['uids'].unique()), key='uid')
|
101 |
+
with st.expander(f'Show UID health data for **{n_runs} selected runs** and **{len(uids)} selected UIDs**'):
|
102 |
+
st.markdown('#')
|
103 |
+
st.subheader(f"UID {uid_src.title()} :violet[Health]")
|
104 |
+
agg_uid_checkbox = st.checkbox('Aggregate UIDs', value=True)
|
105 |
+
if agg_uid_checkbox:
|
106 |
+
metric.uids(df_uid, uid_src, uids)
|
107 |
+
else:
|
108 |
+
for uid in uids:
|
109 |
+
st.caption(f'UID: {uid}')
|
110 |
+
metric.uids(df_uid, uid_src, [uid])
|
111 |
|
112 |
+
st.subheader(f'Cumulative completion frequency')
|
113 |
|
114 |
+
freq_col1, freq_col2 = st.columns(2)
|
115 |
+
freq_ntop = freq_col1.slider('Number of Completions:', min_value=10, max_value=1000, value=100, key='freq_ntop')
|
116 |
+
freq_rm_empty = freq_col2.checkbox('Remove empty (failed)', value=True, key='freq_rm_empty')
|
117 |
+
freq_cumulative = freq_col2.checkbox('Cumulative', value=False, key='freq_cumulative')
|
118 |
+
freq_normalize = freq_col2.checkbox('Normalize', value=True, key='freq_normalize')
|
119 |
|
120 |
+
plot.uid_completion_counts(df_uid, uids=uids, src=uid_src, ntop=freq_ntop, rm_empty=freq_rm_empty, cumulative=freq_cumulative, normalize=freq_normalize)
|
|
|
|
|
121 |
|
122 |
+
|
123 |
+
with st.expander(f'Show UID **{uid_src}** leaderboard data for **{n_runs} selected runs**'):
|
|
|
|
|
|
|
|
|
124 |
|
125 |
st.markdown('#')
|
126 |
st.subheader(f"UID {uid_src.title()} :violet[Leaderboard]")
|
|
|
129 |
uid_agg = uid_col2.selectbox('Aggregation:', ('mean','min','max','size','nunique'), key='uid_agg')
|
130 |
|
131 |
plot.leaderboard(
|
132 |
+
df_uid,
|
133 |
ntop=uid_ntop,
|
134 |
+
group_on='uids',
|
135 |
+
agg_col='rewards',
|
136 |
agg=uid_agg
|
137 |
)
|
138 |
|
139 |
|
140 |
+
with st.expander(f'Show UID **{uid_src}** diversity data for **{n_runs} selected runs**'):
|
141 |
|
142 |
st.markdown('#')
|
143 |
st.subheader(f"UID {uid_src.title()} :violet[Diversity]")
|
|
|
153 |
completion_info = st.empty()
|
154 |
|
155 |
msg_col1, msg_col2 = st.columns(2)
|
156 |
+
# completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
|
157 |
+
completion_src = st.radio('Select event type:', step_types, horizontal=True, key='completion_src')
|
158 |
+
df_comp = df_long[df_long.name.str.contains(completion_src)] if completion_src != 'all' else df_long
|
159 |
|
160 |
+
completion_info.info(f"Showing **{completion_src}** completions for **{n_runs} selected runs**")
|
161 |
|
162 |
+
completion_ntop = msg_col2.slider('Top k:', min_value=1, max_value=50, value=DEFAULT_COMPLETION_NTOP, key='completion_ntop')
|
|
|
|
|
163 |
|
164 |
+
completions = inspect.completions(df_long, 'completions')
|
165 |
|
166 |
# Get completions with highest average rewards
|
167 |
plot.leaderboard(
|
168 |
+
df_comp,
|
169 |
ntop=completion_ntop,
|
170 |
+
group_on='completions',
|
171 |
+
agg_col='rewards',
|
172 |
agg='mean',
|
173 |
alias=True
|
174 |
)
|
175 |
|
176 |
+
with st.expander(f'Show **{completion_src}** completion rewards data for **{n_runs} selected runs**'):
|
177 |
|
178 |
st.markdown('#')
|
179 |
st.subheader('Completion :violet[Rewards]')
|
|
|
182 |
# completion_regex = st.text_input('Completion regex:', value='', key='completion_regex')
|
183 |
|
184 |
plot.completion_rewards(
|
185 |
+
df_comp,
|
186 |
+
completion_col='completions',
|
187 |
+
reward_col='rewards',
|
188 |
+
uid_col='uids',
|
189 |
ntop=completion_ntop,
|
190 |
completions=completion_select,
|
191 |
)
|
192 |
+
# TODO: show the UIDs which have used the selected completions
|
193 |
+
|
194 |
|
195 |
+
with st.expander(f'Show **{completion_src}** completion length data for **{n_runs} selected runs**'):
|
196 |
+
|
197 |
+
st.markdown('#')
|
198 |
+
st.subheader('Completion :violet[Length]')
|
199 |
+
|
200 |
+
completion_length_radio = st.radio('Use: ', ['characters','words','sentences'], key='completion_length_radio')
|
201 |
+
|
202 |
+
# Todo: use color to identify selected completions/ step names/ uids
|
203 |
+
plot.completion_length_time(
|
204 |
+
df_comp,
|
205 |
+
completion_col='completions',
|
206 |
+
uid_col='uids',
|
207 |
+
time_col='completion_times',
|
208 |
+
length_opt=completion_length_radio,
|
209 |
+
)
|
210 |
|
211 |
### Prompt-based scoring ###
|
212 |
with tab4:
|
213 |
# coming soon
|
214 |
st.info('Prompt-based scoring coming soon')
|
215 |
+
st.snow()
|
216 |
|
217 |
# st.dataframe(df_long_long.filter(regex=prompt_src).head())
|
218 |
|
meta_plotting.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import plotly.express as px
|
3 |
+
|
4 |
+
def plot_trace(df, col='emission', agg='mean', time_col='timestamp', ntop=10, hotkeys=None, hotkey_regex=None, abbrev=8, type='Miners'):
|
5 |
+
|
6 |
+
if hotkeys is not None:
|
7 |
+
df = df.loc[df.hotkey.isin(hotkeys)]
|
8 |
+
if hotkey_regex is not None:
|
9 |
+
df = df.loc[df.hotkey.str.contains(hotkey_regex)]
|
10 |
+
|
11 |
+
top_miners = df.groupby('hotkey')[col].agg(agg).sort_values(ascending=False)
|
12 |
+
|
13 |
+
stats = df.loc[df.hotkey.isin(top_miners.index[:ntop])].sort_values(by=time_col)
|
14 |
+
|
15 |
+
stats['hotkey_abbrev'] = stats.hotkey.str[:abbrev]
|
16 |
+
stats['coldkey_abbrev'] = stats.coldkey.str[:abbrev]
|
17 |
+
stats['rank'] = stats.hotkey.map({k:i for i,k in enumerate(top_miners.index, start=1)})
|
18 |
+
|
19 |
+
return px.line(stats.sort_values(by=[time_col,'rank']),
|
20 |
+
x=time_col, y=col, color='coldkey_abbrev', line_group='hotkey_abbrev',
|
21 |
+
hover_data=['hotkey','rank'],
|
22 |
+
labels={col:col.title(),'timestamp':'','coldkey_abbrev':f'Coldkey (first {abbrev} chars)','hotkey_abbrev':f'Hotkey (first {abbrev} chars)'},
|
23 |
+
title=f'Top {ntop} {type}, by {col.title()}',
|
24 |
+
template='plotly_white', width=800, height=600,
|
25 |
+
).update_traces(opacity=0.7)
|
26 |
+
|
27 |
+
|
28 |
+
def plot_cabals(df, sel_col='coldkey', count_col='hotkey', time_col='timestamp', values=None, ntop=10, abbr=8):
|
29 |
+
|
30 |
+
if values is None:
|
31 |
+
values = df[sel_col].value_counts().sort_values(ascending=False).index[:ntop].tolist()
|
32 |
+
print(f'Automatically selected {sel_col!r} = {values!r}')
|
33 |
+
|
34 |
+
df = df.loc[df[sel_col].isin(values)]
|
35 |
+
rates = df.groupby([time_col,sel_col])[count_col].nunique().reset_index()
|
36 |
+
abbr_col = f'{sel_col} (first {abbr} chars)'
|
37 |
+
rates[abbr_col] = rates[sel_col].str[:abbr]
|
38 |
+
return px.line(rates.melt(id_vars=[time_col,sel_col,abbr_col]),
|
39 |
+
x=time_col, y='value', color=abbr_col,
|
40 |
+
#facet_col='variable', facet_col_wrap=1,
|
41 |
+
labels={'value':f'Number of Unique {count_col.title()}s per {sel_col.title()}','timestamp':''},
|
42 |
+
category_orders={abbr_col:[ v[:abbr] for v in values]},
|
43 |
+
# title=f'Unique {count_col.title()}s Associated with Selected {sel_col.title()}s in Metagraph',
|
44 |
+
title=f'Impact of Validators Update on Cabal',
|
45 |
+
width=800, height=600, template='plotly_white',
|
46 |
+
)
|
47 |
+
|
48 |
+
|
meta_utils.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import glob
|
3 |
+
import tqdm
|
4 |
+
import dill as pickle
|
5 |
+
import subprocess
|
6 |
+
import pandas as pd
|
7 |
+
import datetime
|
8 |
+
from functools import lru_cache
|
9 |
+
|
10 |
+
block_time_500k = datetime.datetime(2023, 5, 29, 5, 29, 0)
|
11 |
+
block_time_800k = datetime.datetime(2023, 7, 9, 21, 32, 48)
|
12 |
+
dt = (pd.Timestamp(block_time_800k)-pd.Timestamp(block_time_500k))/(800_000-500_000)
|
13 |
+
|
14 |
+
def run_subprocess(*args):
|
15 |
+
# Trigger the multigraph.py script to run and save metagraph snapshots
|
16 |
+
return subprocess.run('python multigraph.py'.split()+list(args),
|
17 |
+
shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
|
18 |
+
|
19 |
+
def load_metagraph(path, extra_cols=None, rm_cols=None):
|
20 |
+
|
21 |
+
with open(path, 'rb') as f:
|
22 |
+
metagraph = pickle.load(f)
|
23 |
+
|
24 |
+
df = pd.DataFrame(metagraph.axons)
|
25 |
+
df['block'] = metagraph.block.item()
|
26 |
+
df['timestamp'] = block_time_500k + dt*(df['block']-500_000)
|
27 |
+
df['difficulty'] = metagraph.difficulty
|
28 |
+
for c in extra_cols:
|
29 |
+
vals = getattr(metagraph,c)
|
30 |
+
df[c] = vals
|
31 |
+
|
32 |
+
return df.drop(columns=rm_cols)
|
33 |
+
|
34 |
+
@lru_cache(maxsize=16)
|
35 |
+
def load_metagraphs(block_start, block_end, block_step=1000, datadir='data/metagraph/1/', extra_cols=None):
|
36 |
+
|
37 |
+
if extra_cols is None:
|
38 |
+
extra_cols = ['total_stake','ranks','incentive','emission','consensus','trust','validator_trust','dividends']
|
39 |
+
|
40 |
+
blocks = range(block_start, block_end, block_step)
|
41 |
+
print(f'Loading blocks {blocks[0]}-{blocks[-1]} from {datadir}')
|
42 |
+
filenames = sorted(filename for filename in os.listdir(datadir) if int(filename.split('.')[0]) in blocks)
|
43 |
+
print(f'Found {len(filenames)} files in {datadir}')
|
44 |
+
|
45 |
+
metagraphs = []
|
46 |
+
|
47 |
+
pbar = tqdm.tqdm(filenames)
|
48 |
+
for filename in pbar:
|
49 |
+
pbar.set_description(f'Processing {filename}')
|
50 |
+
|
51 |
+
try:
|
52 |
+
metagraph = load_metagraph(os.path.join(datadir, filename), extra_cols=extra_cols, rm_cols=['protocol','placeholder1','placeholder2'])
|
53 |
+
|
54 |
+
metagraphs.append(metagraph)
|
55 |
+
except Exception as e:
|
56 |
+
print(f'filename {filename!r} generated an exception: { e }')
|
57 |
+
|
58 |
+
return pd.concat(metagraphs)
|
59 |
+
|
metadash.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import streamlit as st
|
4 |
+
from meta_utils import run_subprocess, load_metagraphs
|
5 |
+
# from opendashboards.assets import io, inspect, metric, plot
|
6 |
+
from meta_plotting import plot_trace, plot_cabals
|
7 |
+
import asyncio
|
8 |
+
|
9 |
+
## TODO: Read blocks from a big parquet file instead of loading all the pickles -- this is slow
|
10 |
+
|
11 |
+
def get_or_create_eventloop():
|
12 |
+
try:
|
13 |
+
return asyncio.get_event_loop()
|
14 |
+
except RuntimeError as ex:
|
15 |
+
if "There is no current event loop in thread" in str(ex):
|
16 |
+
loop = asyncio.new_event_loop()
|
17 |
+
asyncio.set_event_loop(loop)
|
18 |
+
return asyncio.get_event_loop()
|
19 |
+
|
20 |
+
loop = asyncio.new_event_loop()
|
21 |
+
asyncio.set_event_loop(loop)
|
22 |
+
import bittensor
|
23 |
+
|
24 |
+
datadir='data/metagraph/1/'
|
25 |
+
blockfiles = sorted(int(filename.split('.')[0]) for filename in os.listdir(datadir))
|
26 |
+
DEFAULT_SRC = 'miner'
|
27 |
+
DEFAULT_BLOCK_START = blockfiles[0]
|
28 |
+
DEFAULT_BLOCK_END = blockfiles[-1]
|
29 |
+
DEFAULT_BLOCK_STEP = 1000
|
30 |
+
DEFAULT_NTOP = 10
|
31 |
+
DEFAULT_UID_NTOP = 10
|
32 |
+
|
33 |
+
# Set app config
|
34 |
+
st.set_page_config(
|
35 |
+
page_title='Validator Dashboard',
|
36 |
+
menu_items={
|
37 |
+
'Report a bug': "https://github.com/opentensor/dashboards/issues",
|
38 |
+
'About': """
|
39 |
+
This dashboard is part of the OpenTensor project. \n
|
40 |
+
"""
|
41 |
+
},
|
42 |
+
layout = "centered"
|
43 |
+
)
|
44 |
+
|
45 |
+
st.title('Metagraph :red[Analysis] Dashboard :eyes:')
|
46 |
+
# add vertical space
|
47 |
+
st.markdown('#')
|
48 |
+
st.markdown('#')
|
49 |
+
|
50 |
+
subtensor = bittensor.subtensor(network='finney')
|
51 |
+
current_block = subtensor.get_current_block()
|
52 |
+
current_difficulty = subtensor.difficulty(1, block=current_block)
|
53 |
+
|
54 |
+
bcol1, bcol2, bcol3 = st.columns([0.2, 0.6, 0.2])
|
55 |
+
with bcol1:
|
56 |
+
st.metric('Current **block**', current_block, delta='+7200 [24hr]')
|
57 |
+
# st.metric('Current **difficulty**', f'{current_difficulty/10e12:.0}T', delta='?')
|
58 |
+
|
59 |
+
|
60 |
+
block_start, block_end = bcol2.select_slider(
|
61 |
+
'Select a **block range**',
|
62 |
+
options=blockfiles,
|
63 |
+
value=(DEFAULT_BLOCK_START, DEFAULT_BLOCK_END),
|
64 |
+
format_func=lambda x: f'{x:,}'
|
65 |
+
)
|
66 |
+
|
67 |
+
bcol3.button('Refresh', on_click=run_subprocess)
|
68 |
+
|
69 |
+
|
70 |
+
with st.spinner(text=f'Loading data...'):
|
71 |
+
# df = load_metagraphs(block_start=block_start, block_end=block_end, block_step=DEFAULT_BLOCK_STEP)
|
72 |
+
df = pd.read_parquet('blocks_600100_807300_100')
|
73 |
+
|
74 |
+
blocks = df.block.unique()
|
75 |
+
|
76 |
+
df_sel = df.loc[df.block.between(block_start, block_end)]
|
77 |
+
|
78 |
+
|
79 |
+
# add vertical space
|
80 |
+
st.markdown('#')
|
81 |
+
st.markdown('#')
|
82 |
+
|
83 |
+
tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Miners", "Validators", "Block"])
|
84 |
+
|
85 |
+
miner_choices = ['total_stake','ranks','incentive','emission','consensus','trust','validator_trust','dividends']
|
86 |
+
cabal_choices = ['hotkey','ip','coldkey']
|
87 |
+
|
88 |
+
### Overview ###
|
89 |
+
with tab1:
|
90 |
+
|
91 |
+
x_col = st.radio('X-axis', ['block','timestamp'], index=0, horizontal=True)
|
92 |
+
|
93 |
+
acol1, acol2 = st.columns([0.3, 0.7])
|
94 |
+
sel_ntop = acol1.slider('Number:', min_value=1, max_value=50, value=10, key='sel_ntop')
|
95 |
+
#horizontal list
|
96 |
+
miner_choice = acol2.radio('Select:', miner_choices, horizontal=True, index=0)
|
97 |
+
st.plotly_chart(
|
98 |
+
plot_trace(df_sel, time_col=x_col,col=miner_choice, ntop=sel_ntop),
|
99 |
+
use_container_width=True
|
100 |
+
)
|
101 |
+
|
102 |
+
col1, col2 = st.columns(2)
|
103 |
+
count_col = col1.radio('Count', cabal_choices, index=0, horizontal=True)
|
104 |
+
y_col = col2.radio('Agg on', cabal_choices, index=2, horizontal=True)
|
105 |
+
|
106 |
+
st.plotly_chart(
|
107 |
+
plot_cabals(df_sel, time_col=x_col, count_col=count_col, sel_col=y_col, ntop=sel_ntop),
|
108 |
+
use_container_width=True
|
109 |
+
)
|
110 |
+
|
111 |
+
with tab2:
|
112 |
+
|
113 |
+
# plot of miner weights versus time/block
|
114 |
+
pass
|
multigraph.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import argparse
|
4 |
+
from traceback import print_exc
|
5 |
+
import pickle
|
6 |
+
import tqdm
|
7 |
+
import pandas as pd
|
8 |
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
9 |
+
|
10 |
+
import torch
|
11 |
+
import bittensor
|
12 |
+
|
13 |
+
#TODO: make line charts and other cool stuff for each metagraph snapshot
|
14 |
+
|
15 |
+
def process(block, netuid=1, lite=True, difficulty=False, prune_weights=False, return_graph=False, half=True, subtensor=None):
|
16 |
+
|
17 |
+
if subtensor is None:
|
18 |
+
subtensor = bittensor.subtensor(network='finney')
|
19 |
+
|
20 |
+
try:
|
21 |
+
metagraph = subtensor.metagraph(block=block, netuid=netuid, lite=lite)
|
22 |
+
if difficulty:
|
23 |
+
metagraph.difficulty = subtensor.difficulty(block=block, netuid=netuid)
|
24 |
+
|
25 |
+
if not lite:
|
26 |
+
if half:
|
27 |
+
metagraph.weights = torch.nn.Parameter(metagraph.weights.half(), requires_grad=False)
|
28 |
+
if prune_weights:
|
29 |
+
metagraph.weights = metagraph.weights[metagraph.weights.sum(axis=1) > 0]
|
30 |
+
|
31 |
+
with open(f'data/metagraph/{netuid}/{block}.pkl', 'wb') as f:
|
32 |
+
pickle.dump(metagraph, f)
|
33 |
+
|
34 |
+
return metagraph if return_graph else True
|
35 |
+
|
36 |
+
except Exception as e:
|
37 |
+
print(f'Error processing block {block}: {e}')
|
38 |
+
|
39 |
+
|
40 |
+
def parse_arguments():
|
41 |
+
parser = argparse.ArgumentParser(description='Process metagraphs for a given network.')
|
42 |
+
parser.add_argument('--netuid', type=int, default=1, help='Network UID to use.')
|
43 |
+
parser.add_argument('--difficulty', action='store_true', help='Include difficulty in metagraph.')
|
44 |
+
parser.add_argument('--prune_weights', action='store_true', help='Prune weights in metagraph.')
|
45 |
+
parser.add_argument('--return_graph', action='store_true', help='Return metagraph instead of True.')
|
46 |
+
parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
|
47 |
+
parser.add_argument('--start_block', type=int, default=1_000_000, help='Start block.')
|
48 |
+
parser.add_argument('--end_block', type=int, default=600_000, help='End block.')
|
49 |
+
parser.add_argument('--step_size', type=int, default=100, help='Step size.')
|
50 |
+
return parser.parse_args()
|
51 |
+
|
52 |
+
if __name__ == '__main__':
|
53 |
+
|
54 |
+
subtensor = bittensor.subtensor(network='finney')
|
55 |
+
print(f'Current block: {subtensor.block}')
|
56 |
+
|
57 |
+
args = parse_arguments()
|
58 |
+
|
59 |
+
netuid=args.netuid
|
60 |
+
difficulty=args.difficulty
|
61 |
+
overwrite=False
|
62 |
+
return_graph=args.return_graph
|
63 |
+
|
64 |
+
step_size = args.step_size
|
65 |
+
start_block = args.start_block
|
66 |
+
start_block = (min(subtensor.block, start_block)//step_size)*step_size # round to nearest step_size
|
67 |
+
end_block = args.end_block
|
68 |
+
blocks = range(start_block, end_block, -step_size)
|
69 |
+
|
70 |
+
# only get weights for multiple of 500 blocks
|
71 |
+
lite=lambda x: x%500!=0
|
72 |
+
|
73 |
+
max_workers = min(args.max_workers, len(blocks))
|
74 |
+
|
75 |
+
os.makedirs(f'data/metagraph/{netuid}', exist_ok=True)
|
76 |
+
if not overwrite:
|
77 |
+
blocks = [block for block in blocks if not os.path.exists(f'data/metagraph/{netuid}/{block}.pkl')]
|
78 |
+
|
79 |
+
metagraphs = []
|
80 |
+
|
81 |
+
if len(blocks)==0:
|
82 |
+
print(f'No blocks to process. Current block: {subtensor.block}')
|
83 |
+
quit()
|
84 |
+
|
85 |
+
print(f'Processing {len(blocks)} blocks from {blocks[0]}-{blocks[-1]} using {max_workers} workers.')
|
86 |
+
|
87 |
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
88 |
+
futures = [
|
89 |
+
executor.submit(process, block, lite=lite(block), netuid=netuid, difficulty=difficulty)
|
90 |
+
for block in blocks
|
91 |
+
]
|
92 |
+
|
93 |
+
success = 0
|
94 |
+
with tqdm.tqdm(total=len(futures)) as pbar:
|
95 |
+
for block, future in zip(blocks,futures):
|
96 |
+
try:
|
97 |
+
metagraphs.append(future.result())
|
98 |
+
success += 1
|
99 |
+
except Exception as e:
|
100 |
+
print(f'generated an exception: {print_exc(e)}')
|
101 |
+
pbar.update(1)
|
102 |
+
pbar.set_description(f'Processed {success} blocks. Current block: {block}')
|
103 |
+
|
104 |
+
if not success:
|
105 |
+
raise ValueError('No blocks were successfully processed.')
|
106 |
+
|
107 |
+
print(f'Processed {success} blocks.')
|
108 |
+
if return_graph:
|
109 |
+
for metagraph in metagraphs:
|
110 |
+
print(f'{metagraph.block}: {metagraph.n.item()} nodes, difficulty={getattr(metagraph, "difficulty", None)}, weights={metagraph.weights.shape if hasattr(metagraph, "weights") else None}')
|
111 |
+
|
112 |
+
print(metagraphs[-1])
|
multistats.py
ADDED
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import argparse
|
4 |
+
import tqdm
|
5 |
+
import wandb
|
6 |
+
import traceback
|
7 |
+
import plotly.express as px
|
8 |
+
import pandas as pd
|
9 |
+
from concurrent.futures import ProcessPoolExecutor
|
10 |
+
|
11 |
+
import opendashboards.utils.utils as utils
|
12 |
+
import opendashboards.utils.aggregate as aggregate
|
13 |
+
|
14 |
+
from IPython.display import display
|
15 |
+
|
16 |
+
api= wandb.Api(timeout=60)
|
17 |
+
wandb.login(anonymous="allow")
|
18 |
+
|
19 |
+
def pull_wandb_runs(project='openvalidators', filters=None, min_steps=50, max_steps=100_000, ntop=10, netuid=None, summary_filters=None ):
|
20 |
+
# TODO: speed this up by storing older runs
|
21 |
+
|
22 |
+
all_runs = api.runs(project, filters=filters)
|
23 |
+
print(f'Using {ntop}/{len(all_runs)} runs with more than {min_steps} events')
|
24 |
+
pbar = tqdm.tqdm(all_runs)
|
25 |
+
runs = []
|
26 |
+
n_events = 0
|
27 |
+
successful = 0
|
28 |
+
for i, run in enumerate(pbar):
|
29 |
+
|
30 |
+
summary = run.summary
|
31 |
+
if summary_filters is not None and not summary_filters(summary):
|
32 |
+
continue
|
33 |
+
if netuid is not None and run.config.get('netuid') != netuid:
|
34 |
+
continue
|
35 |
+
step = summary.get('_step',0)
|
36 |
+
if step < min_steps or step > max_steps:
|
37 |
+
# warnings.warn(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
|
38 |
+
continue
|
39 |
+
|
40 |
+
prog_msg = f'Loading data {successful/ntop*100:.0f}% ({successful}/{ntop} runs, {n_events} events)'
|
41 |
+
pbar.set_description(f'{prog_msg}... **fetching** `{run.name}`')
|
42 |
+
|
43 |
+
duration = summary.get('_runtime')
|
44 |
+
end_time = summary.get('_timestamp')
|
45 |
+
# extract values for selected tags
|
46 |
+
rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
|
47 |
+
tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
|
48 |
+
# include bool flag for remaining tags
|
49 |
+
tags.update({k: True for k in run.tags if k not in tags.keys() and k not in tags.values()})
|
50 |
+
|
51 |
+
runs.append({
|
52 |
+
'state': run.state,
|
53 |
+
'num_steps': step,
|
54 |
+
'num_completions': step*sum(len(v) for k, v in run.summary.items() if k.endswith('completions') and isinstance(v, list)),
|
55 |
+
'entity': run.entity,
|
56 |
+
'user': run.user.name,
|
57 |
+
'username': run.user.username,
|
58 |
+
'run_id': run.id,
|
59 |
+
'run_name': run.name,
|
60 |
+
'project': run.project,
|
61 |
+
'run_url': run.url,
|
62 |
+
'run_path': os.path.join(run.entity, run.project, run.id),
|
63 |
+
'start_time': pd.to_datetime(end_time-duration, unit="s"),
|
64 |
+
'end_time': pd.to_datetime(end_time, unit="s"),
|
65 |
+
'duration': pd.to_timedelta(duration, unit="s").round('s'),
|
66 |
+
'netuid': run.config.get('netuid'),
|
67 |
+
**tags
|
68 |
+
})
|
69 |
+
n_events += step
|
70 |
+
successful += 1
|
71 |
+
if successful >= ntop:
|
72 |
+
break
|
73 |
+
|
74 |
+
return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
|
75 |
+
|
76 |
+
def plot_gantt(df_runs):
|
77 |
+
fig = px.timeline(df_runs,
|
78 |
+
x_start="start_time", x_end="end_time", y="username", color="state",
|
79 |
+
title="Timeline of Runs",
|
80 |
+
category_orders={'run_name': df_runs.run_name.unique()},#,'username': sorted(df_runs.username.unique())},
|
81 |
+
hover_name="run_name",
|
82 |
+
hover_data=['hotkey','user','username','run_id','num_steps','num_completions'],
|
83 |
+
color_discrete_map={'running': 'green', 'finished': 'grey', 'killed':'blue', 'crashed':'orange', 'failed': 'red'},
|
84 |
+
opacity=0.3,
|
85 |
+
width=1200,
|
86 |
+
height=800,
|
87 |
+
template="plotly_white",
|
88 |
+
)
|
89 |
+
fig.update_yaxes(tickfont_size=8, title='')
|
90 |
+
fig.show()
|
91 |
+
|
92 |
+
|
93 |
+
def clean_data(df):
|
94 |
+
return df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any').dropna(axis=1, how='all')
|
95 |
+
|
96 |
+
def explode_data(df):
|
97 |
+
list_cols = utils.get_list_col_lengths(df)
|
98 |
+
return utils.explode_data(df, list(list_cols.keys())).apply(pd.to_numeric, errors='ignore')
|
99 |
+
|
100 |
+
|
101 |
+
def load_data(run_id, run_path=None, load=True, save=False, explode=True):
|
102 |
+
|
103 |
+
file_path = os.path.join('data/runs/',f'history-{run_id}.parquet')
|
104 |
+
|
105 |
+
if load and os.path.exists(file_path):
|
106 |
+
df = pd.read_parquet(file_path)
|
107 |
+
# filter out events with missing step length
|
108 |
+
df = df.loc[df.step_length.notna()]
|
109 |
+
|
110 |
+
# detect list columns which as stored as strings
|
111 |
+
ignore_cols = ('moving_averaged_scores')
|
112 |
+
list_cols = [c for c in df.columns if c not in ignore_cols and df[c].dtype == "object" and df[c].str.startswith("[").all()]
|
113 |
+
# convert string representation of list to list
|
114 |
+
# df[list_cols] = df[list_cols].apply(lambda x: eval(x, {'__builtins__': None}) if pd.notna(x) else x)
|
115 |
+
try:
|
116 |
+
df[list_cols] = df[list_cols].fillna('').applymap(eval, na_action='ignore')
|
117 |
+
except ValueError as e:
|
118 |
+
print(f'Error loading {file_path!r} when converting columns {list_cols} to list: {e}', flush=True)
|
119 |
+
|
120 |
+
else:
|
121 |
+
# Download the history from wandb and add metadata
|
122 |
+
run = api.run(run_path)
|
123 |
+
df = pd.DataFrame(list(run.scan_history()))
|
124 |
+
|
125 |
+
# Remove rows with missing completions or rewards, which will be stuff related to weights
|
126 |
+
df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any', inplace=True)
|
127 |
+
|
128 |
+
print(f'Downloaded {df.shape[0]} events from {run_path!r} with id {run_id!r}')
|
129 |
+
|
130 |
+
# Clean and explode dataframe
|
131 |
+
# overwrite object to free memory
|
132 |
+
float_cols = df.filter(regex='reward').columns
|
133 |
+
df = explode_data(clean_data(df)).astype({c: float for c in float_cols}).fillna({c: 0 for c in float_cols})
|
134 |
+
|
135 |
+
if save:
|
136 |
+
df.to_parquet(file_path, index=False)
|
137 |
+
|
138 |
+
# Convert timestamp to datetime.
|
139 |
+
df._timestamp = pd.to_datetime(df._timestamp, unit="s")
|
140 |
+
return df.sort_values("_timestamp")
|
141 |
+
|
142 |
+
|
143 |
+
def calculate_stats(df_long, freq='H', save_path=None, ntop=3 ):
|
144 |
+
|
145 |
+
df_long._timestamp = pd.to_datetime(df_long._timestamp)
|
146 |
+
|
147 |
+
# if dataframe has columns such as followup_completions and answer_completions, convert to multiple rows
|
148 |
+
if 'completions' not in df_long.columns:
|
149 |
+
df_long.set_index(['_timestamp','run_id'], inplace=True)
|
150 |
+
df_schema = pd.concat([
|
151 |
+
df_long[['followup_completions','followup_rewards']].rename(columns={'followup_completions':'completions', 'followup_rewards':'rewards'}),
|
152 |
+
df_long[['answer_completions','answer_rewards']].rename(columns={'answer_completions':'completions', 'answer_rewards':'rewards'})
|
153 |
+
])
|
154 |
+
df_long = df_schema.reset_index()
|
155 |
+
|
156 |
+
run_id = df_long['run_id'].iloc[0]
|
157 |
+
# print(f'Calculating stats for run {run_id!r} dataframe with shape {df_long.shape}')
|
158 |
+
|
159 |
+
# Approximate number of tokens in each completion
|
160 |
+
df_long['completion_num_tokens'] = (df_long['completions'].astype(str).str.split().str.len() / 0.75).round()
|
161 |
+
|
162 |
+
# TODO: use named aggregations
|
163 |
+
reward_aggs = ['sum','mean','std','median','max',aggregate.nonzero_rate, aggregate.nonzero_mean, aggregate.nonzero_std, aggregate.nonzero_median]
|
164 |
+
aggs = {
|
165 |
+
'completions': ['nunique','count', aggregate.diversity, aggregate.successful_diversity, aggregate.success_rate],
|
166 |
+
'completion_num_tokens': ['mean', 'std', 'median', 'max'],
|
167 |
+
**{k: reward_aggs for k in df_long.filter(regex='reward') if df_long[k].nunique() > 1}
|
168 |
+
}
|
169 |
+
|
170 |
+
# Calculate tokens per second
|
171 |
+
if 'completion_times' in df_long.columns:
|
172 |
+
df_long['tokens_per_sec'] = df_long['completion_num_tokens']/(df_long['completion_times']+1e-6)
|
173 |
+
aggs.update({
|
174 |
+
'completion_times': ['mean','std','median','min','max'],
|
175 |
+
'tokens_per_sec': ['mean','std','median','max'],
|
176 |
+
})
|
177 |
+
|
178 |
+
grouper = df_long.groupby(pd.Grouper(key='_timestamp', axis=0, freq=freq))
|
179 |
+
# carry out main aggregations
|
180 |
+
stats = grouper.agg(aggs)
|
181 |
+
# carry out multi-column aggregations using apply
|
182 |
+
diversity = grouper.apply(aggregate.successful_nonzero_diversity)
|
183 |
+
# carry out top completions aggregations using apply
|
184 |
+
top_completions = grouper.apply(aggregate.completion_top_stats, exclude='', ntop=ntop).unstack()
|
185 |
+
|
186 |
+
# combine all aggregations, which have the same index
|
187 |
+
stats = pd.concat([stats, diversity, top_completions], axis=1)
|
188 |
+
|
189 |
+
# flatten multiindex columns
|
190 |
+
stats.columns = ['_'.join([str(cc) for cc in c]) if isinstance(c, tuple) else str(c) for c in stats.columns]
|
191 |
+
stats = stats.reset_index().assign(run_id=run_id)
|
192 |
+
|
193 |
+
if save_path:
|
194 |
+
stats.to_csv(save_path, index=False)
|
195 |
+
|
196 |
+
return stats
|
197 |
+
|
198 |
+
|
199 |
+
|
200 |
+
def process(run, load=True, save=False, load_stats=True, freq='H', ntop=3):
|
201 |
+
|
202 |
+
try:
|
203 |
+
|
204 |
+
stats_path = f'data/aggs/stats-{run["run_id"]}.csv'
|
205 |
+
if load_stats and os.path.exists(stats_path):
|
206 |
+
print(f'Loaded stats file {stats_path!r}')
|
207 |
+
return pd.read_csv(stats_path)
|
208 |
+
|
209 |
+
# Load data and add extra columns from wandb run
|
210 |
+
df_long = load_data(run_id=run['run_id'],
|
211 |
+
run_path=run['run_path'],
|
212 |
+
load=load,
|
213 |
+
save=save,
|
214 |
+
# save = (run['state'] != 'running') & run['end_time']
|
215 |
+
).assign(**run.to_dict())
|
216 |
+
assert isinstance(df_long, pd.DataFrame), f'Expected dataframe, but got {type(df_long)}'
|
217 |
+
|
218 |
+
# Get and save stats
|
219 |
+
return calculate_stats(df_long, freq=freq, save_path=stats_path, ntop=ntop)
|
220 |
+
|
221 |
+
except Exception as e:
|
222 |
+
print(f'Error processing run {run["run_id"]!r}:\t{e.__class__.__name__}: {e}',flush=True)
|
223 |
+
print(traceback.format_exc())
|
224 |
+
|
225 |
+
def line_chart(df, col, title=None):
|
226 |
+
title = title or col.replace('_',' ').title()
|
227 |
+
fig = px.line(df.astype({'_timestamp':str}),
|
228 |
+
x='_timestamp', y=col,
|
229 |
+
line_group='run_id',
|
230 |
+
title=f'{title} over Time',
|
231 |
+
labels={'_timestamp':'', col: title, 'uids':'UID','value':'counts', 'variable':'Completions'},
|
232 |
+
width=800, height=600,
|
233 |
+
template='plotly_white',
|
234 |
+
).update_traces(opacity=0.2)
|
235 |
+
|
236 |
+
fig.write_image(f'data/figures/{col}.png')
|
237 |
+
fig.write_html(f'data/figures/{col}.html')
|
238 |
+
return col
|
239 |
+
|
240 |
+
|
241 |
+
def parse_arguments():
|
242 |
+
parser = argparse.ArgumentParser(description='Process wandb validator runs for a given netuid.')
|
243 |
+
parser.add_argument('--load_runs',action='store_true', help='Load runs from file.')
|
244 |
+
parser.add_argument('--repull_unfinished',action='store_true', help='Re-pull runs that were running when downloaded and saved.')
|
245 |
+
parser.add_argument('--netuid', type=int, default=None, help='Network UID to use.')
|
246 |
+
parser.add_argument('--ntop', type=int, default=1000, help='Number of runs to process.')
|
247 |
+
parser.add_argument('--min_steps', type=int, default=100, help='Minimum number of steps to include.')
|
248 |
+
parser.add_argument('--max_workers', type=int, default=32, help='Max workers to use.')
|
249 |
+
parser.add_argument('--no_plot',action='store_true', help='Prevent plotting.')
|
250 |
+
parser.add_argument('--no_save',action='store_true', help='Prevent saving data to file.')
|
251 |
+
parser.add_argument('--no_load',action='store_true', help='Prevent loading downloaded data from file.')
|
252 |
+
parser.add_argument('--no_load_stats',action='store_true', help='Prevent loading stats data from file.')
|
253 |
+
parser.add_argument('--freq', type=str, default='H', help='Frequency to aggregate data.')
|
254 |
+
parser.add_argument('--completions_ntop', type=int, default=3, help='Number of top completions to include in stats.')
|
255 |
+
|
256 |
+
return parser.parse_args()
|
257 |
+
|
258 |
+
|
259 |
+
if __name__ == '__main__':
|
260 |
+
|
261 |
+
# TODO: flag to overwrite runs that were running when downloaded and saved: check if file date is older than run end time.
|
262 |
+
|
263 |
+
args = parse_arguments()
|
264 |
+
print(args)
|
265 |
+
|
266 |
+
filters = None# {"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
|
267 |
+
# filters={'tags': {'$in': ['5F4tQyWrhfGVcNhoqeiNsR6KjD4wMZ2kfhLj4oHYuyHbZAc3']}} # Is foundation validator
|
268 |
+
if args.load_runs and os.path.exists('data/wandb.csv'):
|
269 |
+
df_runs = pd.read_csv('data/wandb.csv')
|
270 |
+
assert len(df_runs) >= args.ntop, f'Loaded {len(df_runs)} runs, but expected at least {args.ntop}'
|
271 |
+
df_runs = df_runs.iloc[:args.ntop]
|
272 |
+
else:
|
273 |
+
df_runs = pull_wandb_runs(ntop=args.ntop,
|
274 |
+
min_steps=args.min_steps,
|
275 |
+
netuid=args.netuid,
|
276 |
+
filters=filters
|
277 |
+
)#summary_filters=lambda s: s.get('augment_prompt'))
|
278 |
+
df_runs.to_csv('data/wandb.csv', index=False)
|
279 |
+
|
280 |
+
|
281 |
+
os.makedirs('data/runs/', exist_ok=True)
|
282 |
+
os.makedirs('data/aggs/', exist_ok=True)
|
283 |
+
os.makedirs('data/figures/', exist_ok=True)
|
284 |
+
|
285 |
+
display(df_runs)
|
286 |
+
if not args.no_plot:
|
287 |
+
plot_gantt(df_runs)
|
288 |
+
|
289 |
+
with ProcessPoolExecutor(max_workers=min(args.max_workers, df_runs.shape[0])) as executor:
|
290 |
+
futures = [executor.submit(
|
291 |
+
process,
|
292 |
+
run,
|
293 |
+
load=not args.no_load,
|
294 |
+
save=not args.no_save,
|
295 |
+
load_stats=not args.no_load_stats,
|
296 |
+
freq=args.freq,
|
297 |
+
ntop=args.completions_ntop
|
298 |
+
)
|
299 |
+
for _, run in df_runs.iterrows()
|
300 |
+
]
|
301 |
+
|
302 |
+
# Use tqdm to add a progress bar
|
303 |
+
results = []
|
304 |
+
with tqdm.tqdm(total=len(futures)) as pbar:
|
305 |
+
for future in futures:
|
306 |
+
try:
|
307 |
+
result = future.result()
|
308 |
+
results.append(result)
|
309 |
+
except Exception as e:
|
310 |
+
print(f'-----------------------------\nWorker generated an exception in "process" function:\n{e.__class__.__name__}: {e}\n-----------------------------\n',flush=True)
|
311 |
+
pbar.update(1)
|
312 |
+
|
313 |
+
if not results:
|
314 |
+
raise ValueError('No runs were successfully processed.')
|
315 |
+
print(f'Processed {len(results)} runs.',flush=True)
|
316 |
+
|
317 |
+
# Concatenate the results into a single dataframe
|
318 |
+
df = pd.concat(results, ignore_index=True).sort_values(['_timestamp','run_id'], ignore_index=True)
|
319 |
+
|
320 |
+
df.to_csv('data/processed.csv', index=False)
|
321 |
+
print(f'Saved {df.shape[0]} rows to data/processed.csv')
|
322 |
+
|
323 |
+
display(df)
|
324 |
+
print(f'Unique values in columns:')
|
325 |
+
display(df.nunique().sort_values())
|
326 |
+
if not args.no_plot:
|
327 |
+
|
328 |
+
plots = []
|
329 |
+
|
330 |
+
cols = df.set_index(['run_id','_timestamp']).columns
|
331 |
+
with ProcessPoolExecutor(max_workers=min(args.max_workers, len(cols))) as executor:
|
332 |
+
futures = [executor.submit(line_chart, df, c) for c in cols]
|
333 |
+
|
334 |
+
# Use tqdm to add a progress bar
|
335 |
+
results = []
|
336 |
+
with tqdm.tqdm(total=len(futures)) as pbar:
|
337 |
+
for future in futures:
|
338 |
+
try:
|
339 |
+
result = future.result()
|
340 |
+
plots.append(result)
|
341 |
+
except Exception as e:
|
342 |
+
print(f'-----------------------------\nWorker generated an exception in "line_chart" function:\n{e.__class__.__name__}: {e}\n-----------------------------\n',flush=True)
|
343 |
+
# traceback.print_exc()
|
344 |
+
pbar.update(1)
|
345 |
+
|
346 |
+
print(f'Saved {len(plots)} plots to data/figures/')
|
347 |
+
|
348 |
+
|
opendashboards/assets/inspect.py
CHANGED
@@ -3,6 +3,9 @@ import streamlit as st
|
|
3 |
import pandas as pd
|
4 |
import opendashboards.utils.utils as utils
|
5 |
|
|
|
|
|
|
|
6 |
@st.cache_data
|
7 |
def explode_data(df):
|
8 |
list_cols = utils.get_list_col_lengths(df)
|
@@ -19,19 +22,9 @@ def explode_data(df):
|
|
19 |
def completions(df_long, col):
|
20 |
return df_long[col].value_counts()
|
21 |
|
22 |
-
|
23 |
-
def weights(df, index='_timestamp'):
|
24 |
-
# Create a column for each UID and show most recent rows
|
25 |
-
scores = df['moving_averaged_scores'].apply(pd.Series).fillna(method='ffill')
|
26 |
-
if index in df.columns:
|
27 |
-
scores.index = df[index]
|
28 |
-
|
29 |
-
# rename columns
|
30 |
-
scores.rename({i: f'UID-{i}' for i in range(scores.shape[1])}, axis=1, inplace=True)
|
31 |
-
return scores
|
32 |
-
|
33 |
def run_event_data(df_runs, df, selected_runs):
|
34 |
-
|
35 |
st.markdown('#')
|
36 |
|
37 |
show_col1, show_col2 = st.columns(2)
|
@@ -51,4 +44,7 @@ def run_event_data(df_runs, df, selected_runs):
|
|
51 |
column_config={
|
52 |
"url": st.column_config.LinkColumn("URL"),
|
53 |
}
|
54 |
-
)
|
|
|
|
|
|
|
|
3 |
import pandas as pd
|
4 |
import opendashboards.utils.utils as utils
|
5 |
|
6 |
+
def clean_data(df):
|
7 |
+
return df.dropna(subset=df.filter(regex='completions|rewards').columns, how='any')
|
8 |
+
|
9 |
@st.cache_data
|
10 |
def explode_data(df):
|
11 |
list_cols = utils.get_list_col_lengths(df)
|
|
|
22 |
def completions(df_long, col):
|
23 |
return df_long[col].value_counts()
|
24 |
|
25 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def run_event_data(df_runs, df, selected_runs):
|
27 |
+
|
28 |
st.markdown('#')
|
29 |
|
30 |
show_col1, show_col2 = st.columns(2)
|
|
|
44 |
column_config={
|
45 |
"url": st.column_config.LinkColumn("URL"),
|
46 |
}
|
47 |
+
)
|
48 |
+
|
49 |
+
def highlight_row(row, expr, color='lightgrey', bg_color='white'):
|
50 |
+
return [f'background-color:{color}' if expr else f'background-color:{bg_color}'] * len(row)
|
opendashboards/assets/io.py
CHANGED
@@ -5,19 +5,36 @@ import streamlit as st
|
|
5 |
|
6 |
import opendashboards.utils.utils as utils
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
@st.cache_data
|
10 |
def load_runs(project, filters, min_steps=10):
|
11 |
runs = []
|
|
|
|
|
|
|
12 |
msg = st.empty()
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
15 |
if step < min_steps:
|
16 |
msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
|
17 |
continue
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
21 |
# extract values for selected tags
|
22 |
rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
|
23 |
tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
|
@@ -27,17 +44,22 @@ def load_runs(project, filters, min_steps=10):
|
|
27 |
runs.append({
|
28 |
'state': run.state,
|
29 |
'num_steps': step,
|
|
|
30 |
'entity': run.entity,
|
31 |
-
'
|
32 |
-
'
|
33 |
'project': run.project,
|
34 |
'url': run.url,
|
35 |
-
'
|
36 |
'start_time': pd.to_datetime(end_time-duration, unit="s"),
|
37 |
'end_time': pd.to_datetime(end_time, unit="s"),
|
38 |
-
'duration': pd.
|
39 |
**tags
|
40 |
})
|
|
|
|
|
|
|
|
|
41 |
msg.empty()
|
42 |
return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
|
43 |
|
@@ -56,7 +78,7 @@ def load_data(selected_runs, load=True, save=False):
|
|
56 |
run = selected_runs.loc[idx]
|
57 |
prog_msg = f'Loading data {i/len(selected_runs)*100:.0f}% ({successful}/{len(selected_runs)} runs, {n_events} events)'
|
58 |
|
59 |
-
file_path = os.path.join('data',f'history-{run.
|
60 |
|
61 |
if load and os.path.exists(file_path):
|
62 |
progress.progress(i/len(selected_runs),f'{prog_msg}... **reading** `{file_path}`')
|
@@ -67,18 +89,19 @@ def load_data(selected_runs, load=True, save=False):
|
|
67 |
st.exception(e)
|
68 |
continue
|
69 |
else:
|
70 |
-
progress.progress(i/len(selected_runs),f'{prog_msg}... **downloading** `{run.
|
71 |
try:
|
72 |
-
# Download the history from wandb
|
73 |
-
df = utils.download_data(run.
|
74 |
-
# Add metadata to the dataframe
|
75 |
-
df.assign(**run.to_dict())
|
76 |
|
|
|
|
|
|
|
77 |
if save and run.state != 'running':
|
78 |
df.to_csv(file_path, index=False)
|
79 |
# st.info(f'Saved history to {file_path}')
|
80 |
except Exception as e:
|
81 |
-
info.warning(f'Failed to download history for `{run.
|
82 |
st.exception(e)
|
83 |
continue
|
84 |
|
@@ -94,3 +117,92 @@ def load_data(selected_runs, load=True, save=False):
|
|
94 |
return pd.concat(frames)
|
95 |
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
import opendashboards.utils.utils as utils
|
7 |
|
8 |
+
from pandas.api.types import (
|
9 |
+
is_categorical_dtype,
|
10 |
+
is_datetime64_any_dtype,
|
11 |
+
is_numeric_dtype,
|
12 |
+
is_object_dtype,
|
13 |
+
)
|
14 |
+
|
15 |
|
16 |
@st.cache_data
|
17 |
def load_runs(project, filters, min_steps=10):
|
18 |
runs = []
|
19 |
+
n_events = 0
|
20 |
+
successful = 0
|
21 |
+
progress = st.progress(0, 'Fetching runs from wandb')
|
22 |
msg = st.empty()
|
23 |
+
|
24 |
+
all_runs = utils.get_runs(project, filters, api_key=st.secrets['WANDB_API_KEY'])
|
25 |
+
for i, run in enumerate(all_runs):
|
26 |
+
|
27 |
+
summary = run.summary
|
28 |
+
step = summary.get('_step',-1) + 1
|
29 |
if step < min_steps:
|
30 |
msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
|
31 |
continue
|
32 |
+
|
33 |
+
prog_msg = f'Loading data {i/len(all_runs)*100:.0f}% ({successful}/{len(all_runs)} runs, {n_events} events)'
|
34 |
+
progress.progress(i/len(all_runs),f'{prog_msg}... **fetching** `{run.name}`')
|
35 |
+
|
36 |
+
duration = summary.get('_runtime')
|
37 |
+
end_time = summary.get('_timestamp')
|
38 |
# extract values for selected tags
|
39 |
rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
|
40 |
tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
|
|
|
44 |
runs.append({
|
45 |
'state': run.state,
|
46 |
'num_steps': step,
|
47 |
+
'num_completions': step*sum(len(v) for k, v in run.summary.items() if k.endswith('completions') and isinstance(v, list)),
|
48 |
'entity': run.entity,
|
49 |
+
'run_id': run.id,
|
50 |
+
'run_name': run.name,
|
51 |
'project': run.project,
|
52 |
'url': run.url,
|
53 |
+
'run_path': os.path.join(run.entity, run.project, run.id),
|
54 |
'start_time': pd.to_datetime(end_time-duration, unit="s"),
|
55 |
'end_time': pd.to_datetime(end_time, unit="s"),
|
56 |
+
'duration': pd.to_timedelta(duration, unit="s").round('s'),
|
57 |
**tags
|
58 |
})
|
59 |
+
n_events += step
|
60 |
+
successful += 1
|
61 |
+
|
62 |
+
progress.empty()
|
63 |
msg.empty()
|
64 |
return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
|
65 |
|
|
|
78 |
run = selected_runs.loc[idx]
|
79 |
prog_msg = f'Loading data {i/len(selected_runs)*100:.0f}% ({successful}/{len(selected_runs)} runs, {n_events} events)'
|
80 |
|
81 |
+
file_path = os.path.join('data',f'history-{run.run_id}.csv')
|
82 |
|
83 |
if load and os.path.exists(file_path):
|
84 |
progress.progress(i/len(selected_runs),f'{prog_msg}... **reading** `{file_path}`')
|
|
|
89 |
st.exception(e)
|
90 |
continue
|
91 |
else:
|
92 |
+
progress.progress(i/len(selected_runs),f'{prog_msg}... **downloading** `{run.run_path}`')
|
93 |
try:
|
94 |
+
# Download the history from wandb and add metadata
|
95 |
+
df = utils.download_data(run.run_path).assign(**run.to_dict())
|
|
|
|
|
96 |
|
97 |
+
print(f'Downloaded {df.shape[0]} events from `{run.run_path}`. Columns: {df.columns}')
|
98 |
+
df.info()
|
99 |
+
|
100 |
if save and run.state != 'running':
|
101 |
df.to_csv(file_path, index=False)
|
102 |
# st.info(f'Saved history to {file_path}')
|
103 |
except Exception as e:
|
104 |
+
info.warning(f'Failed to download history for `{run.run_path}`')
|
105 |
st.exception(e)
|
106 |
continue
|
107 |
|
|
|
117 |
return pd.concat(frames)
|
118 |
|
119 |
|
120 |
+
def filter_dataframe(df: pd.DataFrame, demo_selection=None) -> pd.DataFrame:
|
121 |
+
"""
|
122 |
+
Adds a UI on top of a dataframe to let viewers filter columns
|
123 |
+
|
124 |
+
Args:
|
125 |
+
df (pd.DataFrame): Original dataframe
|
126 |
+
demo_selection (pd.Index): Index of runs to select (if demo)
|
127 |
+
|
128 |
+
Returns:
|
129 |
+
pd.DataFrame: Filtered dataframe
|
130 |
+
"""
|
131 |
+
filter_mode = st.sidebar.radio("Filter mode", ("Use demo", "Add filters"), index=0)
|
132 |
+
|
133 |
+
run_msg = st.info("Select a single wandb run or compare multiple runs")
|
134 |
+
|
135 |
+
if filter_mode == "Use demo":
|
136 |
+
df = df.loc[demo_selection]
|
137 |
+
run_msg.info(f"Selected {len(df)} runs")
|
138 |
+
return df
|
139 |
+
|
140 |
+
df = df.copy()
|
141 |
+
|
142 |
+
# Try to convert datetimes into a standarrd format (datetime, no timezone)
|
143 |
+
for col in df.columns:
|
144 |
+
if is_object_dtype(df[col]):
|
145 |
+
try:
|
146 |
+
df[col] = pd.to_datetime(df[col])
|
147 |
+
except Exception:
|
148 |
+
pass
|
149 |
+
|
150 |
+
if is_datetime64_any_dtype(df[col]):
|
151 |
+
df[col] = df[col].dt.tz_localize(None)
|
152 |
+
|
153 |
+
modification_container = st.container()
|
154 |
+
|
155 |
+
with modification_container:
|
156 |
+
to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
|
157 |
+
for column in to_filter_columns:
|
158 |
+
left, right = st.columns((1, 20))
|
159 |
+
# Treat columns with < 10 unique values as categorical
|
160 |
+
if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
|
161 |
+
user_cat_input = right.multiselect(
|
162 |
+
f"Values for {column}",
|
163 |
+
df[column].unique(),
|
164 |
+
default=list(df[column].unique()),
|
165 |
+
)
|
166 |
+
df = df[df[column].isin(user_cat_input)]
|
167 |
+
elif is_numeric_dtype(df[column]):
|
168 |
+
_min = float(df[column].min())
|
169 |
+
_max = float(df[column].max())
|
170 |
+
step = (_max - _min) / 100
|
171 |
+
user_num_input = right.slider(
|
172 |
+
f"Values for {column}",
|
173 |
+
min_value=_min,
|
174 |
+
max_value=_max,
|
175 |
+
value=(_min, _max),
|
176 |
+
step=step,
|
177 |
+
)
|
178 |
+
df = df[df[column].between(*user_num_input)]
|
179 |
+
elif is_datetime64_any_dtype(df[column]):
|
180 |
+
user_date_input = right.date_input(
|
181 |
+
f"Values for {column}",
|
182 |
+
value=(
|
183 |
+
df[column].min(),
|
184 |
+
df[column].max(),
|
185 |
+
),
|
186 |
+
)
|
187 |
+
if len(user_date_input) == 2:
|
188 |
+
user_date_input = tuple(map(pd.to_datetime, user_date_input))
|
189 |
+
start_date, end_date = user_date_input
|
190 |
+
df = df.loc[df[column].between(start_date, end_date)]
|
191 |
+
else:
|
192 |
+
user_text_input = right.text_input(
|
193 |
+
f"Substring or regex in {column}",
|
194 |
+
)
|
195 |
+
if user_text_input:
|
196 |
+
df = df[df[column].astype(str).str.contains(user_text_input)]
|
197 |
+
|
198 |
+
|
199 |
+
# Load data if new runs selected
|
200 |
+
if len(df):
|
201 |
+
run_msg.info(f"Selected {len(df)} runs")
|
202 |
+
else:
|
203 |
+
# open a dialog to select runs
|
204 |
+
run_msg.error("Please select at least one run")
|
205 |
+
# st.snow()
|
206 |
+
# st.stop()
|
207 |
+
|
208 |
+
return df
|
opendashboards/assets/metric.py
CHANGED
@@ -2,6 +2,18 @@ import time
|
|
2 |
import pandas as pd
|
3 |
import streamlit as st
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
@st.cache_data
|
7 |
def wandb(df_runs):
|
@@ -9,50 +21,72 @@ def wandb(df_runs):
|
|
9 |
# get rows where start time is older than 24h ago
|
10 |
df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
|
11 |
|
12 |
-
col1, col2, col3 = st.columns(
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
17 |
st.markdown('----')
|
18 |
|
19 |
|
20 |
@st.cache_data
|
21 |
-
def runs(
|
22 |
-
|
23 |
-
col1, col2, col3 = st.columns(
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
col3.metric(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
st.markdown('----')
|
31 |
|
32 |
|
33 |
-
|
34 |
@st.cache_data
|
35 |
-
def uids(df_long, src,
|
36 |
|
37 |
-
uid_col = f'{src}_uids'
|
38 |
-
completion_col = f'{src}_completions'
|
39 |
nsfw_col = f'{src}_nsfw_scores'
|
40 |
-
reward_col = f'{src}_rewards'
|
41 |
|
42 |
-
if
|
43 |
-
df_long = df_long.loc[df_long[
|
44 |
|
45 |
-
col1, col2, col3 = st.columns(
|
46 |
col1.metric(
|
47 |
label="Success %",
|
48 |
-
value=f'{df_long.loc[df_long[
|
|
|
49 |
)
|
50 |
col2.metric(
|
51 |
label="Diversity %",
|
52 |
-
value=f'{df_long[
|
|
|
53 |
)
|
|
|
|
|
|
|
54 |
col3.metric(
|
|
|
|
|
|
|
|
|
|
|
55 |
label="Toxicity %",
|
56 |
-
value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else '
|
|
|
57 |
)
|
58 |
st.markdown('----')
|
|
|
2 |
import pandas as pd
|
3 |
import streamlit as st
|
4 |
|
5 |
+
def fmt(number):
|
6 |
+
units = ['', 'k', 'M', 'B']
|
7 |
+
magnitude = 0
|
8 |
+
while abs(number) >= 1000 and magnitude < len(units) - 1:
|
9 |
+
magnitude += 1
|
10 |
+
number /= 1000
|
11 |
+
|
12 |
+
if units[magnitude]:
|
13 |
+
return f'{number:.2f}{units[magnitude]}'
|
14 |
+
else:
|
15 |
+
return f'{number:.0f}{units[magnitude]}'
|
16 |
+
|
17 |
|
18 |
@st.cache_data
|
19 |
def wandb(df_runs):
|
|
|
21 |
# get rows where start time is older than 24h ago
|
22 |
df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
|
23 |
|
24 |
+
col1, col2, col3, col4 = st.columns(4)
|
25 |
|
26 |
+
# Convert to appropriate units e.g. 1.2k instead of 1200.
|
27 |
+
col1.metric('Runs', fmt(df_runs.shape[0]), delta=fmt(df_runs.shape[0]-df_runs_old.shape[0])+' (24h)')
|
28 |
+
col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)')
|
29 |
+
col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)')
|
30 |
+
col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)')
|
31 |
+
|
32 |
st.markdown('----')
|
33 |
|
34 |
|
35 |
@st.cache_data
|
36 |
+
def runs(df_long):
|
37 |
+
|
38 |
+
col1, col2, col3, col4 = st.columns(4)
|
39 |
+
print(df_long.columns)
|
40 |
+
|
41 |
+
# Convert to appropriate units e.g. 1.2k instead of 1200.c
|
42 |
+
col1.metric('Runs', fmt(df_long.run_id.nunique()))
|
43 |
+
col2.metric('Hotkeys', fmt(df_long.hotkey.nunique()))
|
44 |
+
col3.metric('Events', fmt(df_long.groupby(['run_id','_step']).ngroups))
|
45 |
+
col4.metric('Completions', fmt(df_long.shape[0]))
|
46 |
+
|
47 |
+
name_type = df_long.name.apply(lambda x: x if not x[-1].isdigit() else x[:-1])
|
48 |
+
aggs = df_long.groupby(name_type).agg({'uids': 'nunique', 'completions': 'nunique'})
|
49 |
+
print(aggs)
|
50 |
+
for i,c in enumerate(st.columns(len(aggs))):
|
51 |
+
name = aggs.index[i].title()
|
52 |
+
uid_unique, comp_unique = aggs.iloc[i]
|
53 |
+
c.metric(label=f'{name} UIDs', value=uid_unique)
|
54 |
+
c.metric(label=f'{name} Completions', value=comp_unique)
|
55 |
+
|
56 |
st.markdown('----')
|
57 |
|
58 |
|
59 |
+
|
60 |
@st.cache_data
|
61 |
+
def uids(df_long, src, uids=None):
|
62 |
|
|
|
|
|
63 |
nsfw_col = f'{src}_nsfw_scores'
|
|
|
64 |
|
65 |
+
if uids:
|
66 |
+
df_long = df_long.loc[df_long['uids'].isin(uids)]
|
67 |
|
68 |
+
col1, col2, col3, col4 = st.columns(4)
|
69 |
col1.metric(
|
70 |
label="Success %",
|
71 |
+
value=f'{df_long.loc[df_long["completions"].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}',
|
72 |
+
help='Number of successful completions divided by total number of events'
|
73 |
)
|
74 |
col2.metric(
|
75 |
label="Diversity %",
|
76 |
+
value=f'{df_long["completions"].nunique()/df_long.shape[0] * 100:.1f}',
|
77 |
+
help='Number of unique completions divided by total number of events'
|
78 |
)
|
79 |
+
# uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions
|
80 |
+
# uniqueness is the shared completions between selected uids
|
81 |
+
|
82 |
col3.metric(
|
83 |
+
label="Uniqueness %",
|
84 |
+
value=f'{df_long.groupby("uids")["completions"].nunique().mean()/df_long["completions"].nunique() * 100:.1f}',
|
85 |
+
help='Average number of unique completions per uid divided by all unique completions'
|
86 |
+
)
|
87 |
+
col4.metric(
|
88 |
label="Toxicity %",
|
89 |
+
value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else '--',
|
90 |
+
help='Average toxicity score of all events'
|
91 |
)
|
92 |
st.markdown('----')
|
opendashboards/assets/plot.py
CHANGED
@@ -8,6 +8,8 @@ def uid_diversty(df, rm_failed=True):
|
|
8 |
plotting.plot_uid_diversty(
|
9 |
df,
|
10 |
remove_unsuccessful=rm_failed
|
|
|
|
|
11 |
),
|
12 |
use_container_width=True
|
13 |
)
|
@@ -22,6 +24,8 @@ def leaderboard(df, ntop, group_on, agg_col, agg, alias=False):
|
|
22 |
agg_col=agg_col,
|
23 |
agg=agg,
|
24 |
alias=alias
|
|
|
|
|
25 |
),
|
26 |
use_container_width=True
|
27 |
)
|
@@ -49,4 +53,30 @@ def weights(df, uids, ntop=10):
|
|
49 |
ntop=ntop
|
50 |
),
|
51 |
use_container_width=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
)
|
|
|
8 |
plotting.plot_uid_diversty(
|
9 |
df,
|
10 |
remove_unsuccessful=rm_failed
|
11 |
+
).update_layout(
|
12 |
+
coloraxis_showscale=False,
|
13 |
),
|
14 |
use_container_width=True
|
15 |
)
|
|
|
24 |
agg_col=agg_col,
|
25 |
agg=agg,
|
26 |
alias=alias
|
27 |
+
).update_layout(
|
28 |
+
coloraxis_showscale=False,
|
29 |
),
|
30 |
use_container_width=True
|
31 |
)
|
|
|
53 |
ntop=ntop
|
54 |
),
|
55 |
use_container_width=True
|
56 |
+
)
|
57 |
+
|
58 |
+
def completion_length_time(df, completion_col, uid_col, time_col, length_opt='characters'):
|
59 |
+
return st.plotly_chart(
|
60 |
+
plotting.plot_completion_length_time(
|
61 |
+
df,
|
62 |
+
uid_col=uid_col,
|
63 |
+
completion_col=completion_col,
|
64 |
+
time_col=time_col,
|
65 |
+
length_opt=length_opt
|
66 |
+
),
|
67 |
+
use_container_width=True
|
68 |
+
)
|
69 |
+
|
70 |
+
def uid_completion_counts(df, uids, src, rm_empty, ntop=100, cumulative=False, normalize=True):
|
71 |
+
return st.plotly_chart(
|
72 |
+
plotting.plot_uid_completion_counts(
|
73 |
+
df,
|
74 |
+
uids=uids,
|
75 |
+
src=src,
|
76 |
+
rm_empty=rm_empty,
|
77 |
+
ntop=ntop,
|
78 |
+
cumulative=cumulative,
|
79 |
+
normalize=normalize
|
80 |
+
),
|
81 |
+
use_container_width=True
|
82 |
)
|
opendashboards/utils/aggregate.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def diversity(x):
|
4 |
+
return x.nunique()/len(x) if len(x)>0 else 0
|
5 |
+
|
6 |
+
def _nonempty(x):
|
7 |
+
return x[x.astype(str).str.len()>0]
|
8 |
+
|
9 |
+
def successful_diversity(x):
|
10 |
+
return diversity(_nonempty(x))
|
11 |
+
|
12 |
+
def success_rate(x):
|
13 |
+
return len(_nonempty(x))/len(x) if len(x)>0 else 0
|
14 |
+
|
15 |
+
def threshold_rate(x, threshold):
|
16 |
+
return (x>threshold).sum()/len(x)
|
17 |
+
|
18 |
+
def successful_nonzero_diversity(x):
|
19 |
+
# To be used with groupby.apply
|
20 |
+
return pd.Series({'completions_successful_nonzero_diversity': successful_diversity(x.loc[x['rewards']>0,'completions'])})
|
21 |
+
|
22 |
+
def completion_top_stats(x, exclude=None, ntop=1):
|
23 |
+
# To be used with groupby.apply
|
24 |
+
vc = x['completions'].value_counts()
|
25 |
+
if exclude is not None:
|
26 |
+
vc.drop(exclude, inplace=True, errors='ignore')
|
27 |
+
|
28 |
+
rewards = x.loc[x['completions'].isin(vc.index[:ntop])].groupby('completions').rewards.agg(['mean','std','max'])
|
29 |
+
return pd.DataFrame({
|
30 |
+
'completions_top':rewards.index.tolist(),
|
31 |
+
'completions_freq':vc.values[:ntop],
|
32 |
+
'completions_reward_mean':rewards['mean'].values,
|
33 |
+
'completions_reward_std':rewards['std'].values
|
34 |
+
})
|
35 |
+
|
36 |
+
def top(x, i=0, exclude=''):
|
37 |
+
return _nonempty(x).value_counts().drop(exclude, errors='ignore').index[i]
|
38 |
+
|
39 |
+
def freq(x, i=0, exclude=''):
|
40 |
+
return _nonempty(x).value_counts().drop(exclude, errors='ignore').values[i]
|
41 |
+
|
42 |
+
def nonzero_rate(x):
|
43 |
+
return (x>0).sum()/len(x)
|
44 |
+
|
45 |
+
def nonzero_mean(x):
|
46 |
+
return x[x>0].mean()
|
47 |
+
|
48 |
+
def nonzero_std(x):
|
49 |
+
return x[x>0].std()
|
50 |
+
|
51 |
+
def nonzero_median(x):
|
52 |
+
return x[x>0].median()
|
opendashboards/utils/plotting.py
CHANGED
@@ -44,7 +44,7 @@ def plot_throughput(df: pd.DataFrame, n_minutes: int = 10) -> go.Figure:
|
|
44 |
|
45 |
|
46 |
def plot_weights(scores: pd.DataFrame, ntop: int = 20, uids: List[Union[str, int]] = None) -> go.Figure:
|
47 |
-
"""
|
48 |
|
49 |
Args:
|
50 |
scores (pd.DataFrame): Dataframe of scores. Should be indexed by timestamp and have one column per uid.
|
@@ -62,16 +62,16 @@ def plot_weights(scores: pd.DataFrame, ntop: int = 20, uids: List[Union[str, int
|
|
62 |
).update_traces(opacity=0.7)
|
63 |
|
64 |
|
65 |
-
def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go.Figure:
|
66 |
"""Plot uid diversity as measured by ratio of unique to total completions.
|
67 |
|
68 |
Args:
|
69 |
df (pd.DataFrame): Dataframe of event log.
|
70 |
"""
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
|
76 |
df = df[list_cols].explode(column=list_cols)
|
77 |
if remove_unsuccessful:
|
@@ -88,7 +88,7 @@ def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go
|
|
88 |
frames.append(frame)
|
89 |
|
90 |
merged = pd.merge(*frames, left_index=True, right_index=True, suffixes=("_followup", "_answer"))
|
91 |
-
merged["reward_mean"] = merged.filter(regex="rewards_mean").mean(axis=1)
|
92 |
|
93 |
merged.index.name = "UID"
|
94 |
merged.reset_index(inplace=True)
|
@@ -97,8 +97,8 @@ def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go
|
|
97 |
merged,
|
98 |
x="diversity_followup",
|
99 |
y="diversity_answer",
|
100 |
-
opacity=0.
|
101 |
-
size="
|
102 |
color="reward_mean",
|
103 |
hover_data=["UID"] + merged.columns.tolist(),
|
104 |
marginal_x="histogram",
|
@@ -112,7 +112,7 @@ def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go
|
|
112 |
|
113 |
def plot_completion_rates(
|
114 |
df: pd.DataFrame,
|
115 |
-
msg_col: str = "
|
116 |
time_interval: str = "H",
|
117 |
time_col: str = "_timestamp",
|
118 |
ntop: int = 20,
|
@@ -123,7 +123,7 @@ def plot_completion_rates(
|
|
123 |
|
124 |
Args:
|
125 |
df (pd.DataFrame): Dataframe of event log.
|
126 |
-
msg_col (str, optional): List-like column containing completions. Defaults to '
|
127 |
time_interval (str, optional): Pandas time interval. Defaults to 'H'. See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
|
128 |
time_col (str, optional): Column containing timestamps as pd.Datetime. Defaults to '_timestamp'.
|
129 |
ntop (int, optional): Number of completions to plot. Defaults to 20.
|
@@ -163,10 +163,10 @@ def plot_completion_rates(
|
|
163 |
|
164 |
def plot_completion_rewards(
|
165 |
df: pd.DataFrame,
|
166 |
-
msg_col: str = "
|
167 |
-
reward_col: str = "
|
168 |
time_col: str = "_timestamp",
|
169 |
-
uid_col: str = "
|
170 |
ntop: int = 3,
|
171 |
completions: List[str] = None,
|
172 |
completion_regex: str = None,
|
@@ -175,9 +175,10 @@ def plot_completion_rewards(
|
|
175 |
|
176 |
Args:
|
177 |
df (pd.DataFrame): Dataframe of event log.
|
178 |
-
msg_col (str, optional): List-like column containing completions. Defaults to '
|
179 |
-
reward_col (str, optional): List-like column containing rewards. Defaults to '
|
180 |
time_col (str, optional): Column containing timestamps as pd.Datetime. Defaults to '_timestamp'.
|
|
|
181 |
ntop (int, optional): Number of completions to plot. Defaults to 20.
|
182 |
completions (List[str], optional): List of completions to plot. Defaults to None.
|
183 |
completion_regex (str, optional): Regex to match completions. Defaults to None.
|
@@ -198,7 +199,11 @@ def plot_completion_rewards(
|
|
198 |
else:
|
199 |
completions = completion_counts.index[:ntop]
|
200 |
print(f"Using top {len(completions)} completions: \n{completions}")
|
201 |
-
|
|
|
|
|
|
|
|
|
202 |
# Get ranks of completions in terms of number of occurrences
|
203 |
ranks = completion_counts.rank(method="dense", ascending=False).loc[completions].astype(int)
|
204 |
|
@@ -219,14 +224,14 @@ def plot_completion_rewards(
|
|
219 |
labels={"rank": "Rank", reward_col: "Reward", time_col: ""},
|
220 |
title=f"Rewards for {len(completions)} Messages",
|
221 |
**plotly_config,
|
222 |
-
opacity=0.
|
223 |
)
|
224 |
|
225 |
|
226 |
def plot_leaderboard(
|
227 |
df: pd.DataFrame,
|
228 |
-
group_on: str = "
|
229 |
-
agg_col: str = "
|
230 |
agg: str = "mean",
|
231 |
ntop: int = 10,
|
232 |
alias: bool = False,
|
@@ -235,44 +240,44 @@ def plot_leaderboard(
|
|
235 |
|
236 |
Args:
|
237 |
df (pd.DataFrame): Dataframe of event log.
|
238 |
-
group_on (str, optional): Entities to use for grouping. Defaults to '
|
239 |
-
agg_col (str, optional): Column to aggregate. Defaults to '
|
240 |
agg (str, optional): Aggregation function. Defaults to 'mean'.
|
241 |
ntop (int, optional): Number of entities to plot. Defaults to 10.
|
242 |
alias (bool, optional): Whether to use aliases for indices. Defaults to False.
|
243 |
"""
|
244 |
df = df[[group_on, agg_col]].explode(column=[group_on, agg_col])
|
245 |
|
246 |
-
rankings = df.groupby(group_on)[agg_col].agg(agg).sort_values(ascending=False).head(ntop)
|
247 |
if alias:
|
248 |
index = rankings.index.map({name: str(i) for i, name in enumerate(rankings.index)})
|
249 |
else:
|
250 |
index = rankings.index.astype(str)
|
251 |
|
252 |
-
print(f"Using top {ntop} {group_on} by {agg_col}: \n{rankings}")
|
253 |
return px.bar(
|
254 |
-
x=rankings
|
255 |
y=index,
|
256 |
color=rankings,
|
257 |
orientation="h",
|
258 |
labels={"x": f"{agg_col.title()}", "y": group_on, "color": ""},
|
259 |
title=f"Leaderboard for {agg_col}, top {ntop} {group_on}",
|
260 |
color_continuous_scale="BlueRed",
|
261 |
-
opacity=0.
|
262 |
hover_data=[rankings.index.astype(str)],
|
263 |
**plotly_config,
|
264 |
)
|
265 |
|
266 |
|
|
|
267 |
def plot_dendrite_rates(
|
268 |
-
df: pd.DataFrame, uid_col: str = "
|
269 |
) -> go.Figure:
|
270 |
"""Makes a bar chart of the success rate of dendrite calls for a given set of uids.
|
271 |
|
272 |
Args:
|
273 |
df (pd.DataFrame): Dataframe of event log.
|
274 |
-
uid_col (str, optional): Column containing uids. Defaults to '
|
275 |
-
reward_col (str, optional): Column containing rewards. Defaults to '
|
276 |
ntop (int, optional): Number of uids to plot. Defaults to 20.
|
277 |
uids (List[int], optional): List of uids to plot. Defaults to None.
|
278 |
|
@@ -297,15 +302,91 @@ def plot_dendrite_rates(
|
|
297 |
barmode="group",
|
298 |
title="Dendrite Calls by UID",
|
299 |
color_continuous_scale="Blues",
|
300 |
-
opacity=0.
|
301 |
**plotly_config,
|
302 |
)
|
303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
|
305 |
def plot_network_embedding(
|
306 |
df: pd.DataFrame,
|
307 |
-
uid_col: str = "
|
308 |
-
completion_col: str = "
|
309 |
ntop: int = 1,
|
310 |
uids: List[int] = None,
|
311 |
) -> go.Figure:
|
@@ -314,8 +395,8 @@ def plot_network_embedding(
|
|
314 |
Args:
|
315 |
df (pd.DataFrame): Dataframe of event log.
|
316 |
|
317 |
-
uid_col (str, optional): Column containing uids. Defaults to '
|
318 |
-
completion_col (str, optional): Column containing completions. Defaults to '
|
319 |
ntop (int, optional): Number of uids to plot. Defaults to 20.
|
320 |
hover_data (List[str], optional): Columns to include in hover data. Defaults to None.
|
321 |
uids (List[int], optional): List of uids to plot. Defaults to None.
|
@@ -358,6 +439,6 @@ def plot_network_embedding(
|
|
358 |
title=f"Graph for Top {ntop} Completion Similarities",
|
359 |
color_continuous_scale="BlueRed",
|
360 |
hover_data=["UID", "top_completions"],
|
361 |
-
opacity=0.
|
362 |
**plotly_config,
|
363 |
)
|
|
|
44 |
|
45 |
|
46 |
def plot_weights(scores: pd.DataFrame, ntop: int = 20, uids: List[Union[str, int]] = None) -> go.Figure:
|
47 |
+
"""Plot weights of uids.
|
48 |
|
49 |
Args:
|
50 |
scores (pd.DataFrame): Dataframe of scores. Should be indexed by timestamp and have one column per uid.
|
|
|
62 |
).update_traces(opacity=0.7)
|
63 |
|
64 |
|
65 |
+
def plot_uid_diversty(df: pd.DataFrame, x: str = 'followup', y: str = 'answer', remove_unsuccessful: bool = False) -> go.Figure:
|
66 |
"""Plot uid diversity as measured by ratio of unique to total completions.
|
67 |
|
68 |
Args:
|
69 |
df (pd.DataFrame): Dataframe of event log.
|
70 |
"""
|
71 |
+
return px.scatter(x=[1,2,3],y=[1,2,3])
|
72 |
+
xrows = df.loc[df.name.str.contains(x)]
|
73 |
+
yrows = df.loc[df.name.str.contains(y)]
|
74 |
+
df = pd.merge(xrows, yrows, on='uid', suffixes=('_followup', '_answer'))
|
75 |
|
76 |
df = df[list_cols].explode(column=list_cols)
|
77 |
if remove_unsuccessful:
|
|
|
88 |
frames.append(frame)
|
89 |
|
90 |
merged = pd.merge(*frames, left_index=True, right_index=True, suffixes=("_followup", "_answer"))
|
91 |
+
merged["reward_mean"] = merged.filter(regex="rewards_mean").mean(axis=1).astype(float)
|
92 |
|
93 |
merged.index.name = "UID"
|
94 |
merged.reset_index(inplace=True)
|
|
|
97 |
merged,
|
98 |
x="diversity_followup",
|
99 |
y="diversity_answer",
|
100 |
+
opacity=0.35,
|
101 |
+
# size="completions_size",
|
102 |
color="reward_mean",
|
103 |
hover_data=["UID"] + merged.columns.tolist(),
|
104 |
marginal_x="histogram",
|
|
|
112 |
|
113 |
def plot_completion_rates(
|
114 |
df: pd.DataFrame,
|
115 |
+
msg_col: str = "completions",
|
116 |
time_interval: str = "H",
|
117 |
time_col: str = "_timestamp",
|
118 |
ntop: int = 20,
|
|
|
123 |
|
124 |
Args:
|
125 |
df (pd.DataFrame): Dataframe of event log.
|
126 |
+
msg_col (str, optional): List-like column containing completions. Defaults to 'completions'.
|
127 |
time_interval (str, optional): Pandas time interval. Defaults to 'H'. See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
|
128 |
time_col (str, optional): Column containing timestamps as pd.Datetime. Defaults to '_timestamp'.
|
129 |
ntop (int, optional): Number of completions to plot. Defaults to 20.
|
|
|
163 |
|
164 |
def plot_completion_rewards(
|
165 |
df: pd.DataFrame,
|
166 |
+
msg_col: str = "completions",
|
167 |
+
reward_col: str = "rewards",
|
168 |
time_col: str = "_timestamp",
|
169 |
+
uid_col: str = "uids",
|
170 |
ntop: int = 3,
|
171 |
completions: List[str] = None,
|
172 |
completion_regex: str = None,
|
|
|
175 |
|
176 |
Args:
|
177 |
df (pd.DataFrame): Dataframe of event log.
|
178 |
+
msg_col (str, optional): List-like column containing completions. Defaults to 'completions'.
|
179 |
+
reward_col (str, optional): List-like column containing rewards. Defaults to 'rewards'.
|
180 |
time_col (str, optional): Column containing timestamps as pd.Datetime. Defaults to '_timestamp'.
|
181 |
+
uid_col (str, optional): Column containing UIDs. Defaults to 'uids'.
|
182 |
ntop (int, optional): Number of completions to plot. Defaults to 20.
|
183 |
completions (List[str], optional): List of completions to plot. Defaults to None.
|
184 |
completion_regex (str, optional): Regex to match completions. Defaults to None.
|
|
|
199 |
else:
|
200 |
completions = completion_counts.index[:ntop]
|
201 |
print(f"Using top {len(completions)} completions: \n{completions}")
|
202 |
+
else:
|
203 |
+
found_completions = [c for c in completions if c in completion_counts.index]
|
204 |
+
print(f"Using {len(found_completions)}/{len(completions)} completions: \n{found_completions}")
|
205 |
+
completions = found_completions
|
206 |
+
|
207 |
# Get ranks of completions in terms of number of occurrences
|
208 |
ranks = completion_counts.rank(method="dense", ascending=False).loc[completions].astype(int)
|
209 |
|
|
|
224 |
labels={"rank": "Rank", reward_col: "Reward", time_col: ""},
|
225 |
title=f"Rewards for {len(completions)} Messages",
|
226 |
**plotly_config,
|
227 |
+
opacity=0.35,
|
228 |
)
|
229 |
|
230 |
|
231 |
def plot_leaderboard(
|
232 |
df: pd.DataFrame,
|
233 |
+
group_on: str = "uids",
|
234 |
+
agg_col: str = "rewards",
|
235 |
agg: str = "mean",
|
236 |
ntop: int = 10,
|
237 |
alias: bool = False,
|
|
|
240 |
|
241 |
Args:
|
242 |
df (pd.DataFrame): Dataframe of event log.
|
243 |
+
group_on (str, optional): Entities to use for grouping. Defaults to 'uids'.
|
244 |
+
agg_col (str, optional): Column to aggregate. Defaults to 'rewards'.
|
245 |
agg (str, optional): Aggregation function. Defaults to 'mean'.
|
246 |
ntop (int, optional): Number of entities to plot. Defaults to 10.
|
247 |
alias (bool, optional): Whether to use aliases for indices. Defaults to False.
|
248 |
"""
|
249 |
df = df[[group_on, agg_col]].explode(column=[group_on, agg_col])
|
250 |
|
251 |
+
rankings = df.groupby(group_on)[agg_col].agg(agg).sort_values(ascending=False).head(ntop).astype(float)
|
252 |
if alias:
|
253 |
index = rankings.index.map({name: str(i) for i, name in enumerate(rankings.index)})
|
254 |
else:
|
255 |
index = rankings.index.astype(str)
|
256 |
|
|
|
257 |
return px.bar(
|
258 |
+
x=rankings,
|
259 |
y=index,
|
260 |
color=rankings,
|
261 |
orientation="h",
|
262 |
labels={"x": f"{agg_col.title()}", "y": group_on, "color": ""},
|
263 |
title=f"Leaderboard for {agg_col}, top {ntop} {group_on}",
|
264 |
color_continuous_scale="BlueRed",
|
265 |
+
opacity=0.35,
|
266 |
hover_data=[rankings.index.astype(str)],
|
267 |
**plotly_config,
|
268 |
)
|
269 |
|
270 |
|
271 |
+
|
272 |
def plot_dendrite_rates(
|
273 |
+
df: pd.DataFrame, uid_col: str = "uids", reward_col: str = "rewards", ntop: int = 20, uids: List[int] = None
|
274 |
) -> go.Figure:
|
275 |
"""Makes a bar chart of the success rate of dendrite calls for a given set of uids.
|
276 |
|
277 |
Args:
|
278 |
df (pd.DataFrame): Dataframe of event log.
|
279 |
+
uid_col (str, optional): Column containing uids. Defaults to 'uids'.
|
280 |
+
reward_col (str, optional): Column containing rewards. Defaults to 'rewards'.
|
281 |
ntop (int, optional): Number of uids to plot. Defaults to 20.
|
282 |
uids (List[int], optional): List of uids to plot. Defaults to None.
|
283 |
|
|
|
302 |
barmode="group",
|
303 |
title="Dendrite Calls by UID",
|
304 |
color_continuous_scale="Blues",
|
305 |
+
opacity=0.35,
|
306 |
**plotly_config,
|
307 |
)
|
308 |
|
309 |
+
def plot_completion_length_time(
|
310 |
+
df: pd.DataFrame,
|
311 |
+
uid_col: str = "uids",
|
312 |
+
completion_col: str = "completions",
|
313 |
+
time_col: str = "completion_times",
|
314 |
+
uids: List[int] = None,
|
315 |
+
length_opt: str = 'characters',
|
316 |
+
) -> go.Figure:
|
317 |
+
|
318 |
+
|
319 |
+
df = df[[uid_col, completion_col, time_col]].explode(column=[uid_col, completion_col, time_col])
|
320 |
+
df["time"] = df[time_col].astype(float)
|
321 |
+
if uids is not None:
|
322 |
+
df = df.loc[df[uid_col].isin(uids)]
|
323 |
+
|
324 |
+
|
325 |
+
if length_opt == 'characters':
|
326 |
+
df["completion_length"] = df[completion_col].str.len()
|
327 |
+
elif length_opt == 'words':
|
328 |
+
df["completion_length"] = df[completion_col].str.split().str.len()
|
329 |
+
elif length_opt == 'sentences':
|
330 |
+
df["completion_length"] = df[completion_col].str.split('.').str.len()
|
331 |
+
else:
|
332 |
+
raise ValueError(f"length_opt must be one of 'words', 'characters', or 'sentences', got {length_opt}")
|
333 |
+
|
334 |
+
return px.scatter(
|
335 |
+
df,
|
336 |
+
x='completion_length',
|
337 |
+
y='time',
|
338 |
+
color=uid_col if uids is not None else None,
|
339 |
+
labels={"completion_length": f"Completion Length, {length_opt.title()}", "time": "Time (s)"},
|
340 |
+
title=f"Completion Length vs Time, {length_opt.title()}",
|
341 |
+
marginal_x="histogram",
|
342 |
+
marginal_y="histogram",
|
343 |
+
hover_data=[uid_col, completion_col],
|
344 |
+
opacity=0.35,
|
345 |
+
**plotly_config,
|
346 |
+
)
|
347 |
+
|
348 |
+
def plot_uid_completion_counts(
|
349 |
+
df: pd.DataFrame,
|
350 |
+
uids: List[int],
|
351 |
+
src: str = 'answer',
|
352 |
+
rm_empty: bool = True,
|
353 |
+
ntop: int = 100,
|
354 |
+
cumulative: bool = False,
|
355 |
+
normalize: bool = True,
|
356 |
+
) -> go.Figure:
|
357 |
+
|
358 |
+
completion_col = f'completions'
|
359 |
+
uid_col = f'uids'
|
360 |
+
if rm_empty:
|
361 |
+
df = df.loc[df[completion_col].str.len()>0]
|
362 |
+
|
363 |
+
df = df.loc[df[uid_col].isin(uids)]
|
364 |
+
|
365 |
+
g = df.groupby(uid_col)[completion_col].value_counts(normalize=normalize).reset_index(level=1)
|
366 |
+
y_col = g.columns[-1]
|
367 |
+
|
368 |
+
# rescale each group to have a max of 1 if normalize is True
|
369 |
+
if cumulative:
|
370 |
+
g[y_col] = g.groupby(level=0)[y_col].cumsum().transform(lambda x: x/x.max() if normalize else x)
|
371 |
+
|
372 |
+
# get top n completions
|
373 |
+
g = g.groupby(level=0).head(ntop)
|
374 |
+
|
375 |
+
# # create a rank column which increments by one and resets when the uid changes
|
376 |
+
g['rank'] = g.groupby(level=0).cumcount()+1
|
377 |
+
|
378 |
+
return px.line(g.sort_index().reset_index(),
|
379 |
+
x='rank',y=y_col,color=uid_col,
|
380 |
+
labels={'rank':'Top Completions',uid_col:'UID',y_col:y_col.replace('_',' ').title()},
|
381 |
+
title=f'{src.title()} Completion {y_col.replace("_"," ").title()}s by Rank',
|
382 |
+
**plotly_config,
|
383 |
+
).update_traces(opacity=0.7)
|
384 |
+
|
385 |
|
386 |
def plot_network_embedding(
|
387 |
df: pd.DataFrame,
|
388 |
+
uid_col: str = "uids",
|
389 |
+
completion_col: str = "completions",
|
390 |
ntop: int = 1,
|
391 |
uids: List[int] = None,
|
392 |
) -> go.Figure:
|
|
|
395 |
Args:
|
396 |
df (pd.DataFrame): Dataframe of event log.
|
397 |
|
398 |
+
uid_col (str, optional): Column containing uids. Defaults to 'uids'.
|
399 |
+
completion_col (str, optional): Column containing completions. Defaults to 'completions'.
|
400 |
ntop (int, optional): Number of uids to plot. Defaults to 20.
|
401 |
hover_data (List[str], optional): Columns to include in hover data. Defaults to None.
|
402 |
uids (List[int], optional): List of uids to plot. Defaults to None.
|
|
|
439 |
title=f"Graph for Top {ntop} Completion Similarities",
|
440 |
color_continuous_scale="BlueRed",
|
441 |
hover_data=["UID", "top_completions"],
|
442 |
+
opacity=0.35,
|
443 |
**plotly_config,
|
444 |
)
|