steffenc commited on
Commit
97fcb64
·
1 Parent(s): ac84ae9
dashboard.py CHANGED
@@ -1,13 +1,16 @@
 
 
1
  import streamlit as st
2
  from opendashboards.assets import io, inspect, metric, plot
3
 
4
- # dendrite time versus completion length
5
  # prompt-based completion score stats
6
  # instrospect specific RUN-UID-COMPLETION
 
7
 
8
  DEFAULT_PROJECT = "openvalidators"
9
  DEFAULT_FILTERS = {"tags": {"$in": ["1.0.0", "1.0.1", "1.0.2", "1.0.3", "1.0.4"]}}
10
  DEFAULT_SELECTED_RUNS = ['kt9bzxii']
 
11
  DEFAULT_SRC = 'followup'
12
  DEFAULT_COMPLETION_NTOP = 10
13
  DEFAULT_UID_NTOP = 10
@@ -31,59 +34,80 @@ st.title('Validator :red[Analysis] Dashboard :eyes:')
31
  st.markdown('#')
32
  st.markdown('#')
33
 
34
- # with st.sidebar:
35
- # st.sidebar.header('Pages')
36
 
37
  with st.spinner(text=f'Checking wandb...'):
38
  df_runs = io.load_runs(project=DEFAULT_PROJECT, filters=DEFAULT_FILTERS, min_steps=10)
39
 
 
 
 
 
 
 
 
 
 
 
40
  metric.wandb(df_runs)
41
 
 
42
  # add vertical space
43
  st.markdown('#')
44
  st.markdown('#')
45
 
46
- tab1, tab2, tab3, tab4 = st.tabs(["Wandb Runs", "UID Health", "Completions", "Prompt-based scoring"])
47
-
48
 
49
  ### Wandb Runs ###
50
  with tab1:
51
 
52
  st.markdown('#')
53
- st.header(":violet[Wandb] Runs")
54
-
55
- run_msg = st.info("Select a single run or compare multiple runs")
56
- selected_runs = st.multiselect(f'Runs ({len(df_runs)})', df_runs.id, default=DEFAULT_SELECTED_RUNS, key='runs')
 
 
 
 
 
 
 
 
 
57
 
58
- # Load data if new runs selected
59
- if not selected_runs:
60
- # open a dialog to select runs
61
- run_msg.error("Please select at least one run")
62
- st.snow()
 
63
  st.stop()
64
 
65
- df = io.load_data(df_runs.loc[df_runs.id.isin(selected_runs)], load=True, save=True)
66
- df_long = inspect.explode_data(df)
67
- df_weights = inspect.weights(df)
68
 
69
- metric.runs(df, df_long, selected_runs)
70
-
71
- with st.expander(f'Show :violet[raw] data for {len(selected_runs)} selected runs'):
72
- inspect.run_event_data(df_runs,df, selected_runs)
 
 
 
 
 
73
 
74
 
75
  ### UID Health ###
76
  with tab2:
77
 
78
  st.markdown('#')
79
- st.header("UID :violet[Health]")
80
- st.info(f"Showing UID health metrics for **{len(selected_runs)} selected runs**")
81
 
82
  uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
83
 
84
  metric.uids(df_long, uid_src)
85
 
86
- with st.expander(f'Show UID **{uid_src}** weights data for **{len(selected_runs)} selected runs**'):
87
 
88
  uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
89
  st.markdown('#')
@@ -93,8 +117,8 @@ with tab2:
93
  df_weights,
94
  uids=uids,
95
  )
96
-
97
- with st.expander(f'Show UID **{uid_src}** leaderboard data for **{len(selected_runs)} selected runs**'):
98
 
99
  st.markdown('#')
100
  st.subheader(f"UID {uid_src.title()} :violet[Leaderboard]")
@@ -111,7 +135,7 @@ with tab2:
111
  )
112
 
113
 
114
- with st.expander(f'Show UID **{uid_src}** diversity data for **{len(selected_runs)} selected runs**'):
115
 
116
  st.markdown('#')
117
  st.subheader(f"UID {uid_src.title()} :violet[Diversity]")
@@ -128,13 +152,14 @@ with tab3:
128
 
129
  msg_col1, msg_col2 = st.columns(2)
130
  completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
131
- completion_info.info(f"Showing **{completion_src}** completions for **{len(selected_runs)} selected runs**")
132
-
133
  completion_ntop = msg_col2.slider('Top k:', min_value=1, max_value=50, value=DEFAULT_COMPLETION_NTOP, key='completion_ntop')
134
 
135
  completion_col = f'{completion_src}_completions'
136
  reward_col = f'{completion_src}_rewards'
137
  uid_col = f'{completion_src}_uids'
 
138
 
139
  completions = inspect.completions(df_long, completion_col)
140
 
@@ -148,7 +173,7 @@ with tab3:
148
  alias=True
149
  )
150
 
151
- with st.expander(f'Show **{completion_src}** completion rewards data for **{len(selected_runs)} selected runs**'):
152
 
153
  st.markdown('#')
154
  st.subheader('Completion :violet[Rewards]')
@@ -166,10 +191,26 @@ with tab3:
166
  )
167
 
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  ### Prompt-based scoring ###
170
  with tab4:
171
  # coming soon
172
  st.info('Prompt-based scoring coming soon')
 
173
 
174
  # st.dataframe(df_long_long.filter(regex=prompt_src).head())
175
 
 
1
+ import time
2
+ import pandas as pd
3
  import streamlit as st
4
  from opendashboards.assets import io, inspect, metric, plot
5
 
 
6
  # prompt-based completion score stats
7
  # instrospect specific RUN-UID-COMPLETION
8
+ # cache individual file loads
9
 
10
  DEFAULT_PROJECT = "openvalidators"
11
  DEFAULT_FILTERS = {"tags": {"$in": ["1.0.0", "1.0.1", "1.0.2", "1.0.3", "1.0.4"]}}
12
  DEFAULT_SELECTED_RUNS = ['kt9bzxii']
13
+ DEFAULT_SELECTED_HOTKEYS = None
14
  DEFAULT_SRC = 'followup'
15
  DEFAULT_COMPLETION_NTOP = 10
16
  DEFAULT_UID_NTOP = 10
 
34
  st.markdown('#')
35
  st.markdown('#')
36
 
 
 
37
 
38
  with st.spinner(text=f'Checking wandb...'):
39
  df_runs = io.load_runs(project=DEFAULT_PROJECT, filters=DEFAULT_FILTERS, min_steps=10)
40
 
41
+
42
+ ### Wandb Runs ###
43
+ with st.sidebar:
44
+
45
+ st.markdown('#')
46
+ st.sidebar.header(":violet[Select] Runs")
47
+
48
+ df_runs_subset = io.filter_dataframe(df_runs, demo_selection=df_runs.id.isin(DEFAULT_SELECTED_RUNS))
49
+ n_runs = len(df_runs_subset)
50
+
51
  metric.wandb(df_runs)
52
 
53
+
54
  # add vertical space
55
  st.markdown('#')
56
  st.markdown('#')
57
 
58
+ tab1, tab2, tab3, tab4 = st.tabs(["Raw Data", "UID Health", "Completions", "Prompt-based scoring"])
 
59
 
60
  ### Wandb Runs ###
61
  with tab1:
62
 
63
  st.markdown('#')
64
+ st.subheader(":violet[Run] Data")
65
+ with st.expander(f'Show :violet[raw] wandb data'):
66
+
67
+ filter_selected_checkbox = st.checkbox('Filter to selected runs', value=True)
68
+ df_to_show = df_runs_subset if filter_selected_checkbox else df_runs
69
+
70
+ # TODO: make this editable so that runs can be selected directly from the table
71
+ st.dataframe(
72
+ df_to_show.assign(
73
+ Selected=df_to_show.index.isin(df_runs_subset.index)
74
+ ).set_index('Selected').sort_index(ascending=False),#.style.highlight_max(subset=df_runs_subset.index, color='lightgreen', axis=1),
75
+ use_container_width=True,
76
+ )
77
 
78
+ if n_runs:
79
+ df = io.load_data(df_runs_subset, load=True, save=True)
80
+ df_long = inspect.explode_data(df)
81
+ df_weights = inspect.weights(df)
82
+ else:
83
+ st.info(f'You must select at least one run to load data')
84
  st.stop()
85
 
86
+ metric.runs(df_long)
 
 
87
 
88
+ st.markdown('#')
89
+ st.subheader(":violet[Event] Data")
90
+ with st.expander(f'Show :violet[raw] event data for **{n_runs} selected runs**'):
91
+ raw_data_col1, raw_data_col2 = st.columns(2)
92
+ use_long_checkbox = raw_data_col1.checkbox('Use long format', value=True)
93
+ num_rows = raw_data_col2.slider('Number of rows:', min_value=1, max_value=100, value=10, key='num_rows')
94
+ st.dataframe(df_long.head(num_rows) if use_long_checkbox else df.head(num_rows),
95
+ use_container_width=True)
96
+
97
 
98
 
99
  ### UID Health ###
100
  with tab2:
101
 
102
  st.markdown('#')
103
+ st.subheader("UID :violet[Health]")
104
+ st.info(f"Showing UID health metrics for **{n_runs} selected runs**")
105
 
106
  uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
107
 
108
  metric.uids(df_long, uid_src)
109
 
110
+ with st.expander(f'Show UID **{uid_src}** weights data for **{n_runs} selected runs**'):
111
 
112
  uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
113
  st.markdown('#')
 
117
  df_weights,
118
  uids=uids,
119
  )
120
+
121
+ with st.expander(f'Show UID **{uid_src}** leaderboard data for **{n_runs} selected runs**'):
122
 
123
  st.markdown('#')
124
  st.subheader(f"UID {uid_src.title()} :violet[Leaderboard]")
 
135
  )
136
 
137
 
138
+ with st.expander(f'Show UID **{uid_src}** diversity data for **{n_runs} selected runs**'):
139
 
140
  st.markdown('#')
141
  st.subheader(f"UID {uid_src.title()} :violet[Diversity]")
 
152
 
153
  msg_col1, msg_col2 = st.columns(2)
154
  completion_src = msg_col1.radio('Select one:', ['followup', 'answer'], horizontal=True, key='completion_src')
155
+ completion_info.info(f"Showing **{completion_src}** completions for **{n_runs} selected runs**")
156
+
157
  completion_ntop = msg_col2.slider('Top k:', min_value=1, max_value=50, value=DEFAULT_COMPLETION_NTOP, key='completion_ntop')
158
 
159
  completion_col = f'{completion_src}_completions'
160
  reward_col = f'{completion_src}_rewards'
161
  uid_col = f'{completion_src}_uids'
162
+ time_col = f'{completion_src}_times'
163
 
164
  completions = inspect.completions(df_long, completion_col)
165
 
 
173
  alias=True
174
  )
175
 
176
+ with st.expander(f'Show **{completion_src}** completion rewards data for **{n_runs} selected runs**'):
177
 
178
  st.markdown('#')
179
  st.subheader('Completion :violet[Rewards]')
 
191
  )
192
 
193
 
194
+ with st.expander(f'Show **{completion_src}** completion length data for **{n_runs} selected runs**'):
195
+
196
+ st.markdown('#')
197
+ st.subheader('Completion :violet[Length]')
198
+
199
+ words_checkbox = st.checkbox('Use words', value=True, key='words_checkbox')
200
+
201
+ plot.completion_length_time(
202
+ df,
203
+ completion_col=completion_col,
204
+ uid_col=uid_col,
205
+ time_col=time_col,
206
+ words=words_checkbox,
207
+ )
208
+
209
  ### Prompt-based scoring ###
210
  with tab4:
211
  # coming soon
212
  st.info('Prompt-based scoring coming soon')
213
+ st.snow()
214
 
215
  # st.dataframe(df_long_long.filter(regex=prompt_src).head())
216
 
opendashboards/assets/inspect.py CHANGED
@@ -51,4 +51,7 @@ def run_event_data(df_runs, df, selected_runs):
51
  column_config={
52
  "url": st.column_config.LinkColumn("URL"),
53
  }
54
- )
 
 
 
 
51
  column_config={
52
  "url": st.column_config.LinkColumn("URL"),
53
  }
54
+ )
55
+
56
+ def highlight_row(row, expr, color='lightgrey', bg_color='white'):
57
+ return [f'background-color:{color}' if expr else f'background-color:{bg_color}'] * len(row)
opendashboards/assets/io.py CHANGED
@@ -5,6 +5,13 @@ import streamlit as st
5
 
6
  import opendashboards.utils.utils as utils
7
 
 
 
 
 
 
 
 
8
 
9
  @st.cache_data
10
  def load_runs(project, filters, min_steps=10):
@@ -94,3 +101,92 @@ def load_data(selected_runs, load=True, save=False):
94
  return pd.concat(frames)
95
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  import opendashboards.utils.utils as utils
7
 
8
+ from pandas.api.types import (
9
+ is_categorical_dtype,
10
+ is_datetime64_any_dtype,
11
+ is_numeric_dtype,
12
+ is_object_dtype,
13
+ )
14
+
15
 
16
  @st.cache_data
17
  def load_runs(project, filters, min_steps=10):
 
101
  return pd.concat(frames)
102
 
103
 
104
+ def filter_dataframe(df: pd.DataFrame, demo_selection=None) -> pd.DataFrame:
105
+ """
106
+ Adds a UI on top of a dataframe to let viewers filter columns
107
+
108
+ Args:
109
+ df (pd.DataFrame): Original dataframe
110
+ demo_selection (pd.Index): Index of runs to select (if demo)
111
+
112
+ Returns:
113
+ pd.DataFrame: Filtered dataframe
114
+ """
115
+ filter_mode = st.sidebar.radio("Filter mode", ("Use demo", "Add filters"), index=0)
116
+
117
+ run_msg = st.info("Select a single wandb run or compare multiple runs")
118
+
119
+ if filter_mode == "Use demo":
120
+ df = df.loc[demo_selection]
121
+ run_msg.info(f"Selected {len(df)} runs")
122
+ return df
123
+
124
+ df = df.copy()
125
+
126
+ # Try to convert datetimes into a standarrd format (datetime, no timezone)
127
+ for col in df.columns:
128
+ if is_object_dtype(df[col]):
129
+ try:
130
+ df[col] = pd.to_datetime(df[col])
131
+ except Exception:
132
+ pass
133
+
134
+ if is_datetime64_any_dtype(df[col]):
135
+ df[col] = df[col].dt.tz_localize(None)
136
+
137
+ modification_container = st.container()
138
+
139
+ with modification_container:
140
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
141
+ for column in to_filter_columns:
142
+ left, right = st.columns((1, 20))
143
+ # Treat columns with < 10 unique values as categorical
144
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
145
+ user_cat_input = right.multiselect(
146
+ f"Values for {column}",
147
+ df[column].unique(),
148
+ default=list(df[column].unique()),
149
+ )
150
+ df = df[df[column].isin(user_cat_input)]
151
+ elif is_numeric_dtype(df[column]):
152
+ _min = float(df[column].min())
153
+ _max = float(df[column].max())
154
+ step = (_max - _min) / 100
155
+ user_num_input = right.slider(
156
+ f"Values for {column}",
157
+ min_value=_min,
158
+ max_value=_max,
159
+ value=(_min, _max),
160
+ step=step,
161
+ )
162
+ df = df[df[column].between(*user_num_input)]
163
+ elif is_datetime64_any_dtype(df[column]):
164
+ user_date_input = right.date_input(
165
+ f"Values for {column}",
166
+ value=(
167
+ df[column].min(),
168
+ df[column].max(),
169
+ ),
170
+ )
171
+ if len(user_date_input) == 2:
172
+ user_date_input = tuple(map(pd.to_datetime, user_date_input))
173
+ start_date, end_date = user_date_input
174
+ df = df.loc[df[column].between(start_date, end_date)]
175
+ else:
176
+ user_text_input = right.text_input(
177
+ f"Substring or regex in {column}",
178
+ )
179
+ if user_text_input:
180
+ df = df[df[column].astype(str).str.contains(user_text_input)]
181
+
182
+
183
+ # Load data if new runs selected
184
+ if len(df):
185
+ run_msg.info(f"Selected {len(df)} runs")
186
+ else:
187
+ # open a dialog to select runs
188
+ run_msg.error("Please select at least one run")
189
+ # st.snow()
190
+ # st.stop()
191
+
192
+ return df
opendashboards/assets/metric.py CHANGED
@@ -18,11 +18,11 @@ def wandb(df_runs):
18
 
19
 
20
  @st.cache_data
21
- def runs(df, df_long, selected_runs):
22
 
23
  col1, col2, col3 = st.columns(3)
24
- col1.metric(label="Runs", value=len(selected_runs))
25
- col1.metric(label="Events", value=df.shape[0]) #
26
  col2.metric(label="Followup UIDs", value=df_long.followup_uids.nunique())
27
  col2.metric(label="Answer UIDs", value=df_long.answer_uids.nunique())
28
  col3.metric(label="Followup Completions", value=df_long.followup_completions.nunique())
 
18
 
19
 
20
  @st.cache_data
21
+ def runs(df_long):
22
 
23
  col1, col2, col3 = st.columns(3)
24
+ col1.metric(label="Runs", value=df_long.run_id.nunique())
25
+ col1.metric(label="Events", value=df_long.shape[0])
26
  col2.metric(label="Followup UIDs", value=df_long.followup_uids.nunique())
27
  col2.metric(label="Answer UIDs", value=df_long.answer_uids.nunique())
28
  col3.metric(label="Followup Completions", value=df_long.followup_completions.nunique())
opendashboards/assets/plot.py CHANGED
@@ -53,4 +53,16 @@ def weights(df, uids, ntop=10):
53
  ntop=ntop
54
  ),
55
  use_container_width=True
 
 
 
 
 
 
 
 
 
 
 
 
56
  )
 
53
  ntop=ntop
54
  ),
55
  use_container_width=True
56
+ )
57
+
58
+ def completion_length_time(df, completion_col, uid_col, time_col, words=False):
59
+ return st.plotly_chart(
60
+ plotting.plot_completion_length_time(
61
+ df,
62
+ uid_col=uid_col,
63
+ completion_col=completion_col,
64
+ time_col=time_col,
65
+ words=words
66
+ ),
67
+ use_container_width=True
68
  )
opendashboards/utils/plotting.py CHANGED
@@ -97,8 +97,8 @@ def plot_uid_diversty(df: pd.DataFrame, remove_unsuccessful: bool = False) -> go
97
  merged,
98
  x="diversity_followup",
99
  y="diversity_answer",
100
- opacity=0.3,
101
- size="followup_completions_size",
102
  color="reward_mean",
103
  hover_data=["UID"] + merged.columns.tolist(),
104
  marginal_x="histogram",
@@ -219,7 +219,7 @@ def plot_completion_rewards(
219
  labels={"rank": "Rank", reward_col: "Reward", time_col: ""},
220
  title=f"Rewards for {len(completions)} Messages",
221
  **plotly_config,
222
- opacity=0.3,
223
  )
224
 
225
 
@@ -258,12 +258,13 @@ def plot_leaderboard(
258
  labels={"x": f"{agg_col.title()}", "y": group_on, "color": ""},
259
  title=f"Leaderboard for {agg_col}, top {ntop} {group_on}",
260
  color_continuous_scale="BlueRed",
261
- opacity=0.5,
262
  hover_data=[rankings.index.astype(str)],
263
  **plotly_config,
264
  )
265
 
266
 
 
267
  def plot_dendrite_rates(
268
  df: pd.DataFrame, uid_col: str = "answer_uids", reward_col: str = "answer_rewards", ntop: int = 20, uids: List[int] = None
269
  ) -> go.Figure:
@@ -297,10 +298,38 @@ def plot_dendrite_rates(
297
  barmode="group",
298
  title="Dendrite Calls by UID",
299
  color_continuous_scale="Blues",
300
- opacity=0.5,
301
  **plotly_config,
302
  )
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
  def plot_network_embedding(
306
  df: pd.DataFrame,
@@ -358,6 +387,6 @@ def plot_network_embedding(
358
  title=f"Graph for Top {ntop} Completion Similarities",
359
  color_continuous_scale="BlueRed",
360
  hover_data=["UID", "top_completions"],
361
- opacity=0.5,
362
  **plotly_config,
363
  )
 
97
  merged,
98
  x="diversity_followup",
99
  y="diversity_answer",
100
+ opacity=0.35,
101
+ # size="followup_completions_size",
102
  color="reward_mean",
103
  hover_data=["UID"] + merged.columns.tolist(),
104
  marginal_x="histogram",
 
219
  labels={"rank": "Rank", reward_col: "Reward", time_col: ""},
220
  title=f"Rewards for {len(completions)} Messages",
221
  **plotly_config,
222
+ opacity=0.35,
223
  )
224
 
225
 
 
258
  labels={"x": f"{agg_col.title()}", "y": group_on, "color": ""},
259
  title=f"Leaderboard for {agg_col}, top {ntop} {group_on}",
260
  color_continuous_scale="BlueRed",
261
+ opacity=0.35,
262
  hover_data=[rankings.index.astype(str)],
263
  **plotly_config,
264
  )
265
 
266
 
267
+
268
  def plot_dendrite_rates(
269
  df: pd.DataFrame, uid_col: str = "answer_uids", reward_col: str = "answer_rewards", ntop: int = 20, uids: List[int] = None
270
  ) -> go.Figure:
 
298
  barmode="group",
299
  title="Dendrite Calls by UID",
300
  color_continuous_scale="Blues",
301
+ opacity=0.35,
302
  **plotly_config,
303
  )
304
 
305
+ def plot_completion_length_time(
306
+ df: pd.DataFrame,
307
+ uid_col: str = "answer_uids",
308
+ completion_col: str = "answer_completions",
309
+ time_col: str = "answer_times",
310
+ words: bool = False,
311
+ ) -> go.Figure:
312
+
313
+ df = df[[uid_col, completion_col, time_col]].explode(column=[uid_col, completion_col, time_col])
314
+ df["time"] = df[time_col].astype(float)
315
+ if words:
316
+ df["completion_length"] = df[completion_col].str.split().str.len()
317
+ else:
318
+ df["completion_length"] = df[completion_col].str.len()
319
+
320
+ return px.scatter(
321
+ df,
322
+ x='completion_length',
323
+ y='time',
324
+ labels={"completion_length": f"Completion Length, {'Words' if words else 'Characters'}", "time": "Time (s)"},
325
+ title=f"Completion Length vs Time, {'Words' if words else 'Characters'}",
326
+ marginal_x="histogram",
327
+ marginal_y="histogram",
328
+ hover_data=[uid_col, completion_col],
329
+ opacity=0.35,
330
+ **plotly_config,
331
+ )
332
+
333
 
334
  def plot_network_embedding(
335
  df: pd.DataFrame,
 
387
  title=f"Graph for Top {ntop} Completion Similarities",
388
  color_continuous_scale="BlueRed",
389
  hover_data=["UID", "top_completions"],
390
+ opacity=0.35,
391
  **plotly_config,
392
  )