steffenc commited on
Commit
969e123
·
1 Parent(s): f7bf07d

Make it better?

Browse files
dashboard.py CHANGED
@@ -40,13 +40,13 @@ with st.spinner(text=f'Checking wandb...'):
40
 
41
 
42
  ### Wandb Runs ###
43
- with st.sidebar:
44
 
45
- st.markdown('#')
46
- st.sidebar.header(":violet[Select] Runs")
47
 
48
- df_runs_subset = io.filter_dataframe(df_runs, demo_selection=df_runs.id.isin(DEFAULT_SELECTED_RUNS))
49
- n_runs = len(df_runs_subset)
50
 
51
  metric.wandb(df_runs)
52
 
@@ -64,26 +64,28 @@ with tab1:
64
  st.subheader(":violet[Run] Data")
65
  with st.expander(f'Show :violet[raw] wandb data'):
66
 
67
- filter_selected_checkbox = st.checkbox('Filter to selected runs', value=True)
68
- df_to_show = df_runs_subset if filter_selected_checkbox else df_runs
69
 
70
- # TODO: make this editable so that runs can be selected directly from the table
71
- st.dataframe(
72
- df_to_show.assign(
73
- Selected=df_to_show.index.isin(df_runs_subset.index)
74
- ).set_index('Selected').sort_index(ascending=False),#.style.highlight_max(subset=df_runs_subset.index, color='lightgreen', axis=1),
75
  use_container_width=True,
76
  )
 
 
77
 
78
  if n_runs:
79
  df = io.load_data(df_runs_subset, load=True, save=True)
 
80
  df_long = inspect.explode_data(df)
81
  df_weights = inspect.weights(df)
82
  else:
83
  st.info(f'You must select at least one run to load data')
84
  st.stop()
85
 
86
- metric.runs(df_long)
87
 
88
  st.markdown('#')
89
  st.subheader(":violet[Event] Data")
@@ -93,10 +95,12 @@ with tab1:
93
  num_rows = raw_data_col2.slider('Number of rows:', min_value=1, max_value=100, value=10, key='num_rows')
94
  st.dataframe(df_long.head(num_rows) if use_long_checkbox else df.head(num_rows),
95
  use_container_width=True)
96
-
97
 
98
 
99
  ### UID Health ###
 
 
100
  with tab2:
101
 
102
  st.markdown('#')
@@ -106,10 +110,31 @@ with tab2:
106
  uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
107
 
108
  metric.uids(df_long, uid_src)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- with st.expander(f'Show UID **{uid_src}** weights data for **{n_runs} selected runs**'):
111
 
112
- uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
113
  st.markdown('#')
114
  st.subheader(f"UID {uid_src.title()} :violet[Weights]")
115
 
@@ -189,6 +214,7 @@ with tab3:
189
  ntop=completion_ntop,
190
  completions=completion_select,
191
  )
 
192
 
193
 
194
  with st.expander(f'Show **{completion_src}** completion length data for **{n_runs} selected runs**'):
 
40
 
41
 
42
  ### Wandb Runs ###
43
+ # with st.sidebar:
44
 
45
+ # st.markdown('#')
46
+ # st.sidebar.header(":violet[Select] Runs")
47
 
48
+ # df_runs_subset = io.filter_dataframe(df_runs, demo_selection=df_runs.id.isin(DEFAULT_SELECTED_RUNS))
49
+ # n_runs = len(df_runs_subset)
50
 
51
  metric.wandb(df_runs)
52
 
 
64
  st.subheader(":violet[Run] Data")
65
  with st.expander(f'Show :violet[raw] wandb data'):
66
 
67
+ # filter_selected_checkbox = st.checkbox('Filter to selected runs', value=True)
68
+ # df_to_show = df_runs_subset if filter_selected_checkbox else df_runs
69
 
70
+ edited_df = st.data_editor(
71
+ df_runs.assign(Select=False).set_index('Select'),
72
+ column_config={"Select": st.column_config.CheckboxColumn(required=True)},
73
+ disabled=df_runs.columns,
 
74
  use_container_width=True,
75
  )
76
+ df_runs_subset = df_runs[edited_df.index==True]
77
+ n_runs = len(df_runs_subset)
78
 
79
  if n_runs:
80
  df = io.load_data(df_runs_subset, load=True, save=True)
81
+ df = inspect.clean_data(df)
82
  df_long = inspect.explode_data(df)
83
  df_weights = inspect.weights(df)
84
  else:
85
  st.info(f'You must select at least one run to load data')
86
  st.stop()
87
 
88
+ metric.runs(df_long, n_runs)
89
 
90
  st.markdown('#')
91
  st.subheader(":violet[Event] Data")
 
95
  num_rows = raw_data_col2.slider('Number of rows:', min_value=1, max_value=100, value=10, key='num_rows')
96
  st.dataframe(df_long.head(num_rows) if use_long_checkbox else df.head(num_rows),
97
  use_container_width=True)
98
+
99
 
100
 
101
  ### UID Health ###
102
+ # TODO: Live time - time elapsed since moving_averaged_score for selected UID was 0 (lower bound so use >Time)
103
+ # TODO: Weight - Most recent weight for selected UID (Add warning if weight is 0 or most recent timestamp is not current)
104
  with tab2:
105
 
106
  st.markdown('#')
 
110
  uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
111
 
112
  metric.uids(df_long, uid_src)
113
+ uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
114
+ with st.expander(f'Show UID health data for **{n_runs} selected runs** and **{len(uids)} selected UIDs**'):
115
+ st.markdown('#')
116
+ st.subheader(f"UID {uid_src.title()} :violet[Health]")
117
+ agg_uid_checkbox = st.checkbox('Aggregate UIDs', value=True)
118
+ if agg_uid_checkbox:
119
+ metric.uids(df_long, uid_src, uids)
120
+ else:
121
+ for uid in uids:
122
+ st.caption(f'UID: {uid}')
123
+ metric.uids(df_long, uid_src, [uid])
124
+
125
+ st.subheader(f'Cumulative completion frequency')
126
+
127
+ freq_col1, freq_col2 = st.columns(2)
128
+ freq_ntop = freq_col1.slider('Number of Completions:', min_value=10, max_value=1000, value=100, key='freq_ntop')
129
+ freq_rm_empty = freq_col2.checkbox('Remove empty (failed)', value=True, key='freq_rm_empty')
130
+ freq_cumulative = freq_col2.checkbox('Cumulative', value=False, key='freq_cumulative')
131
+ freq_normalize = freq_col2.checkbox('Normalize', value=True, key='freq_normalize')
132
+
133
+ plot.uid_completion_counts(df_long, uids=uids, src=uid_src, ntop=freq_ntop, rm_empty=freq_rm_empty, cumulative=freq_cumulative, normalize=freq_normalize)
134
+
135
 
136
+ with st.expander(f'Show UID weights data for **{n_runs} selected runs** and **{len(uids)} selected UIDs**'):
137
 
 
138
  st.markdown('#')
139
  st.subheader(f"UID {uid_src.title()} :violet[Weights]")
140
 
 
214
  ntop=completion_ntop,
215
  completions=completion_select,
216
  )
217
+ # TODO: show the UIDs which have used the selected completions
218
 
219
 
220
  with st.expander(f'Show **{completion_src}** completion length data for **{n_runs} selected runs**'):
opendashboards/assets/inspect.py CHANGED
@@ -3,6 +3,9 @@ import streamlit as st
3
  import pandas as pd
4
  import opendashboards.utils.utils as utils
5
 
 
 
 
6
  @st.cache_data
7
  def explode_data(df):
8
  list_cols = utils.get_list_col_lengths(df)
@@ -28,10 +31,10 @@ def weights(df, index='_timestamp'):
28
 
29
  # rename columns
30
  scores.rename({i: f'UID-{i}' for i in range(scores.shape[1])}, axis=1, inplace=True)
31
- return scores
32
-
33
  def run_event_data(df_runs, df, selected_runs):
34
-
35
  st.markdown('#')
36
 
37
  show_col1, show_col2 = st.columns(2)
@@ -52,6 +55,6 @@ def run_event_data(df_runs, df, selected_runs):
52
  "url": st.column_config.LinkColumn("URL"),
53
  }
54
  )
55
-
56
  def highlight_row(row, expr, color='lightgrey', bg_color='white'):
57
  return [f'background-color:{color}' if expr else f'background-color:{bg_color}'] * len(row)
 
3
  import pandas as pd
4
  import opendashboards.utils.utils as utils
5
 
6
+ def clean_data(df):
7
+ return df.dropna(subset=df.filter(regex='completions|rewards').columns, how='all')
8
+
9
  @st.cache_data
10
  def explode_data(df):
11
  list_cols = utils.get_list_col_lengths(df)
 
31
 
32
  # rename columns
33
  scores.rename({i: f'UID-{i}' for i in range(scores.shape[1])}, axis=1, inplace=True)
34
+ return scores
35
+
36
  def run_event_data(df_runs, df, selected_runs):
37
+
38
  st.markdown('#')
39
 
40
  show_col1, show_col2 = st.columns(2)
 
55
  "url": st.column_config.LinkColumn("URL"),
56
  }
57
  )
58
+
59
  def highlight_row(row, expr, color='lightgrey', bg_color='white'):
60
  return [f'background-color:{color}' if expr else f'background-color:{bg_color}'] * len(row)
opendashboards/assets/io.py CHANGED
@@ -16,15 +16,25 @@ from pandas.api.types import (
16
  @st.cache_data
17
  def load_runs(project, filters, min_steps=10):
18
  runs = []
 
 
 
19
  msg = st.empty()
20
- for run in utils.get_runs(project, filters, api_key=st.secrets['WANDB_API_KEY']):
21
- step = run.summary.get('_step',0)
 
 
 
 
22
  if step < min_steps:
23
  msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
24
  continue
25
-
26
- duration = run.summary.get('_runtime')
27
- end_time = run.summary.get('_timestamp')
 
 
 
28
  # extract values for selected tags
29
  rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
30
  tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
@@ -34,6 +44,7 @@ def load_runs(project, filters, min_steps=10):
34
  runs.append({
35
  'state': run.state,
36
  'num_steps': step,
 
37
  'entity': run.entity,
38
  'id': run.id,
39
  'name': run.name,
@@ -42,9 +53,13 @@ def load_runs(project, filters, min_steps=10):
42
  'path': os.path.join(run.entity, run.project, run.id),
43
  'start_time': pd.to_datetime(end_time-duration, unit="s"),
44
  'end_time': pd.to_datetime(end_time, unit="s"),
45
- 'duration': pd.to_datetime(duration, unit="s"),
46
  **tags
47
  })
 
 
 
 
48
  msg.empty()
49
  return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
50
 
 
16
  @st.cache_data
17
  def load_runs(project, filters, min_steps=10):
18
  runs = []
19
+ n_events = 0
20
+ successful = 0
21
+ progress = st.progress(0, 'Fetching runs from wandb')
22
  msg = st.empty()
23
+
24
+ all_runs = utils.get_runs(project, filters, api_key=st.secrets['WANDB_API_KEY'])
25
+ for i, run in enumerate(all_runs):
26
+
27
+ summary = run.summary
28
+ step = summary.get('_step',0)
29
  if step < min_steps:
30
  msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
31
  continue
32
+
33
+ prog_msg = f'Loading data {i/len(all_runs)*100:.0f}% ({successful}/{len(all_runs)} runs, {n_events} events)'
34
+ progress.progress(i/len(all_runs),f'{prog_msg}... **fetching** `{run.name}`')
35
+
36
+ duration = summary.get('_runtime')
37
+ end_time = summary.get('_timestamp')
38
  # extract values for selected tags
39
  rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
40
  tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
 
44
  runs.append({
45
  'state': run.state,
46
  'num_steps': step,
47
+ 'num_completions': step*sum(len(v) for k, v in run.summary.items() if k.endswith('completions') and isinstance(v, list)),
48
  'entity': run.entity,
49
  'id': run.id,
50
  'name': run.name,
 
53
  'path': os.path.join(run.entity, run.project, run.id),
54
  'start_time': pd.to_datetime(end_time-duration, unit="s"),
55
  'end_time': pd.to_datetime(end_time, unit="s"),
56
+ 'duration': pd.to_timedelta(duration, unit="s").round('s'),
57
  **tags
58
  })
59
+ n_events += step
60
+ successful += 1
61
+
62
+ progress.empty()
63
  msg.empty()
64
  return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})
65
 
opendashboards/assets/metric.py CHANGED
@@ -1,7 +1,20 @@
1
  import time
 
2
  import pandas as pd
3
  import streamlit as st
4
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  @st.cache_data
7
  def wandb(df_runs):
@@ -9,50 +22,63 @@ def wandb(df_runs):
9
  # get rows where start time is older than 24h ago
10
  df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
11
 
12
- col1, col2, col3 = st.columns(3)
13
 
14
- col1.metric('Runs', df_runs.shape[0], delta=f'{df_runs.shape[0]-df_runs_old.shape[0]} (24h)')
15
- col2.metric('Hotkeys', df_runs.hotkey.nunique(), delta=f'{df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique()} (24h)')
16
- col3.metric('Events', df_runs.num_steps.sum(), delta=f'{df_runs.num_steps.sum()-df_runs_old.num_steps.sum()} (24h)')
 
 
 
17
  st.markdown('----')
18
 
19
 
20
  @st.cache_data
21
- def runs(df_long):
22
 
23
  col1, col2, col3 = st.columns(3)
24
- col1.metric(label="Runs", value=df_long.id.nunique())
25
- col1.metric(label="Events", value=df_long.shape[0])
26
  col2.metric(label="Followup UIDs", value=df_long.followup_uids.nunique())
27
  col2.metric(label="Answer UIDs", value=df_long.answer_uids.nunique())
28
- col3.metric(label="Followup Completions", value=df_long.followup_completions.nunique())
29
- col3.metric(label="Answer Completions", value=df_long.answer_completions.nunique())
30
  st.markdown('----')
31
 
32
 
33
-
34
  @st.cache_data
35
- def uids(df_long, src, uid=None):
36
 
37
  uid_col = f'{src}_uids'
38
  completion_col = f'{src}_completions'
39
  nsfw_col = f'{src}_nsfw_scores'
40
  reward_col = f'{src}_rewards'
41
 
42
- if uid is not None:
43
- df_long = df_long.loc[df_long[uid_col] == uid]
44
 
45
- col1, col2, col3 = st.columns(3)
46
  col1.metric(
47
  label="Success %",
48
- value=f'{df_long.loc[df_long[completion_col].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}'
 
49
  )
50
  col2.metric(
51
  label="Diversity %",
52
- value=f'{df_long[completion_col].nunique()/df_long.shape[0] * 100:.1f}'
 
53
  )
 
 
54
  col3.metric(
 
 
 
 
 
55
  label="Toxicity %",
56
- value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else 'N/A'
 
57
  )
58
  st.markdown('----')
 
1
  import time
2
+ import numerize
3
  import pandas as pd
4
  import streamlit as st
5
 
6
+ def fmt(number):
7
+ units = ['', 'k', 'M', 'B']
8
+ magnitude = 0
9
+ while abs(number) >= 1000 and magnitude < len(units) - 1:
10
+ magnitude += 1
11
+ number /= 1000
12
+
13
+ if units[magnitude]:
14
+ return f'{number:.2f}{units[magnitude]}'
15
+ else:
16
+ return f'{number:.0f}{units[magnitude]}'
17
+
18
 
19
  @st.cache_data
20
  def wandb(df_runs):
 
22
  # get rows where start time is older than 24h ago
23
  df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
24
 
25
+ col1, col2, col3, col4 = st.columns(4)
26
 
27
+ # Convert to appropriate units e.g. 1.2k instead of 1200.
28
+ col1.metric('Runs', fmt(df_runs.shape[0]), delta=fmt(df_runs.shape[0]-df_runs_old.shape[0])+' (24h)')
29
+ col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)')
30
+ col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)')
31
+ col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)')
32
+
33
  st.markdown('----')
34
 
35
 
36
  @st.cache_data
37
+ def runs(df_long, n_runs):
38
 
39
  col1, col2, col3 = st.columns(3)
40
+ col1.metric(label="Runs", value=n_runs)
41
+ col1.metric(label="Events", value=df_long.shape[0])
42
  col2.metric(label="Followup UIDs", value=df_long.followup_uids.nunique())
43
  col2.metric(label="Answer UIDs", value=df_long.answer_uids.nunique())
44
+ col3.metric(label="Unique Followups", value=df_long.followup_completions.nunique())
45
+ col3.metric(label="Unique Answers", value=df_long.answer_completions.nunique())
46
  st.markdown('----')
47
 
48
 
49
+
50
  @st.cache_data
51
+ def uids(df_long, src, uids=None):
52
 
53
  uid_col = f'{src}_uids'
54
  completion_col = f'{src}_completions'
55
  nsfw_col = f'{src}_nsfw_scores'
56
  reward_col = f'{src}_rewards'
57
 
58
+ if uids:
59
+ df_long = df_long.loc[df_long[uid_col].isin(uids)]
60
 
61
+ col1, col2, col3, col4 = st.columns(4)
62
  col1.metric(
63
  label="Success %",
64
+ value=f'{df_long.loc[df_long[completion_col].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}',
65
+ help='Number of successful completions divided by total number of events'
66
  )
67
  col2.metric(
68
  label="Diversity %",
69
+ value=f'{df_long[completion_col].nunique()/df_long.shape[0] * 100:.1f}',
70
+ help='Number of unique completions divided by total number of events'
71
  )
72
+ # uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions
73
+
74
  col3.metric(
75
+ label="Uniqueness %",
76
+ value=f'{df_long.groupby(uid_col)[completion_col].nunique().mean()/df_long[completion_col].nunique() * 100:.1f}',
77
+ help='Average number of unique completions per uid divided by all unique completions'
78
+ )
79
+ col4.metric(
80
  label="Toxicity %",
81
+ value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else '--',
82
+ help='Average toxicity score of all events'
83
  )
84
  st.markdown('----')
opendashboards/assets/plot.py CHANGED
@@ -65,4 +65,18 @@ def completion_length_time(df, completion_col, uid_col, time_col, words=False):
65
  words=words
66
  ),
67
  use_container_width=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  )
 
65
  words=words
66
  ),
67
  use_container_width=True
68
+ )
69
+
70
+ def uid_completion_counts(df, uids, src, rm_empty, ntop=100, cumulative=False, normalize=True):
71
+ return st.plotly_chart(
72
+ plotting.plot_uid_completion_counts(
73
+ df,
74
+ uids=uids,
75
+ src=src,
76
+ rm_empty=rm_empty,
77
+ ntop=ntop,
78
+ cumulative=cumulative,
79
+ normalize=normalize
80
+ ),
81
+ use_container_width=True
82
  )
opendashboards/utils/plotting.py CHANGED
@@ -249,7 +249,6 @@ def plot_leaderboard(
249
  else:
250
  index = rankings.index.astype(str)
251
 
252
- print(f"Using top {ntop} {group_on} by {agg_col}: \n{rankings}")
253
  return px.bar(
254
  x=rankings,
255
  y=index,
@@ -307,16 +306,16 @@ def plot_completion_length_time(
307
  uid_col: str = "answer_uids",
308
  completion_col: str = "answer_completions",
309
  time_col: str = "answer_times",
310
- words: bool = False,
311
  ) -> go.Figure:
312
-
313
  df = df[[uid_col, completion_col, time_col]].explode(column=[uid_col, completion_col, time_col])
314
  df["time"] = df[time_col].astype(float)
315
  if words:
316
  df["completion_length"] = df[completion_col].str.split().str.len()
317
  else:
318
  df["completion_length"] = df[completion_col].str.len()
319
-
320
  return px.scatter(
321
  df,
322
  x='completion_length',
@@ -329,7 +328,44 @@ def plot_completion_length_time(
329
  opacity=0.35,
330
  **plotly_config,
331
  )
332
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
  def plot_network_embedding(
335
  df: pd.DataFrame,
 
249
  else:
250
  index = rankings.index.astype(str)
251
 
 
252
  return px.bar(
253
  x=rankings,
254
  y=index,
 
306
  uid_col: str = "answer_uids",
307
  completion_col: str = "answer_completions",
308
  time_col: str = "answer_times",
309
+ words: bool = False,
310
  ) -> go.Figure:
311
+
312
  df = df[[uid_col, completion_col, time_col]].explode(column=[uid_col, completion_col, time_col])
313
  df["time"] = df[time_col].astype(float)
314
  if words:
315
  df["completion_length"] = df[completion_col].str.split().str.len()
316
  else:
317
  df["completion_length"] = df[completion_col].str.len()
318
+
319
  return px.scatter(
320
  df,
321
  x='completion_length',
 
328
  opacity=0.35,
329
  **plotly_config,
330
  )
331
+
332
+ def plot_uid_completion_counts(
333
+ df: pd.DataFrame,
334
+ uids: List[int],
335
+ src: str = 'answer',
336
+ rm_empty: bool = True,
337
+ ntop: int = 100,
338
+ cumulative: bool = False,
339
+ normalize: bool = True,
340
+ ) -> go.Figure:
341
+
342
+ completion_col = f'{src}_completions'
343
+ uid_col = f'{src}_uids'
344
+ if rm_empty:
345
+ df = df.loc[df[completion_col].str.len()>0]
346
+
347
+ df = df.loc[df[uid_col].isin(uids)]
348
+
349
+ g = df.groupby(uid_col)[completion_col].value_counts(normalize=normalize).reset_index(level=1)
350
+ y_col = g.columns[-1]
351
+
352
+ # rescale each group to have a max of 1 if normalize is True
353
+ if cumulative:
354
+ g[y_col] = g.groupby(level=0)[y_col].cumsum().transform(lambda x: x/x.max() if normalize else x)
355
+
356
+ # get top n completions
357
+ g = g.groupby(level=0).head(ntop)
358
+
359
+ # # create a rank column which increments by one and resets when the uid changes
360
+ g['rank'] = g.groupby(level=0).cumcount()+1
361
+
362
+ return px.line(g.sort_index().reset_index(),
363
+ x='rank',y=y_col,color=uid_col,
364
+ labels={'rank':'Top Completions',uid_col:'UID',y_col:y_col.replace('_',' ').title()},
365
+ title=f'{src.title()} Completion {y_col.replace("_"," ").title()}s by Rank',
366
+ **plotly_config,
367
+ ).update_traces(opacity=0.7)
368
+
369
 
370
  def plot_network_embedding(
371
  df: pd.DataFrame,