Spaces:
Paused
Paused
declutter layout
Browse files- app.py +8 -9
- opendashboards/assets/metric.py +12 -13
app.py
CHANGED
@@ -21,7 +21,7 @@ from opendashboards.assets import io, inspect, metric, plot
|
|
21 |
|
22 |
WANDB_PROJECT = "opentensor-dev/alpha-validators"
|
23 |
PROJECT_URL = f'https://wandb.ai/{WANDB_PROJECT}/table?workspace=default'
|
24 |
-
MAX_RECENT_RUNS =
|
25 |
DEFAULT_FILTERS = {}#{"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
|
26 |
DEFAULT_SELECTED_HOTKEYS = None
|
27 |
DEFAULT_TASK = 'qa'
|
@@ -56,13 +56,6 @@ metric.wandb(df_runs)
|
|
56 |
|
57 |
# add vertical space
|
58 |
st.markdown('#')
|
59 |
-
|
60 |
-
runid_c1, runid_c2 = st.columns([3, 1])
|
61 |
-
# make multiselect for run_ids with label on same line
|
62 |
-
run_ids = runid_c1.multiselect('Select one or more weights and biases run by id:', df_runs['run_id'], key='run_id', default=df_runs['run_id'][:3], help=f'Select one or more runs to analyze. You can find the raw data for these runs [here]({PROJECT_URL}).')
|
63 |
-
n_runs = len(run_ids)
|
64 |
-
df_runs_subset = df_runs[df_runs['run_id'].isin(run_ids)]
|
65 |
-
|
66 |
st.markdown('#')
|
67 |
|
68 |
tab1, tab2, tab3, tab4 = st.tabs(["Run Data", "UID Health", "Completions", "Prompt-based scoring"])
|
@@ -72,7 +65,13 @@ with tab1:
|
|
72 |
|
73 |
st.markdown('#')
|
74 |
st.subheader(":violet[Run] Data")
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
edited_df = st.data_editor(
|
78 |
df_runs.assign(Select=False).set_index('Select'),
|
|
|
21 |
|
22 |
WANDB_PROJECT = "opentensor-dev/alpha-validators"
|
23 |
PROJECT_URL = f'https://wandb.ai/{WANDB_PROJECT}/table?workspace=default'
|
24 |
+
MAX_RECENT_RUNS = 300
|
25 |
DEFAULT_FILTERS = {}#{"tags": {"$in": [f'1.1.{i}' for i in range(10)]}}
|
26 |
DEFAULT_SELECTED_HOTKEYS = None
|
27 |
DEFAULT_TASK = 'qa'
|
|
|
56 |
|
57 |
# add vertical space
|
58 |
st.markdown('#')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
st.markdown('#')
|
60 |
|
61 |
tab1, tab2, tab3, tab4 = st.tabs(["Run Data", "UID Health", "Completions", "Prompt-based scoring"])
|
|
|
65 |
|
66 |
st.markdown('#')
|
67 |
st.subheader(":violet[Run] Data")
|
68 |
+
|
69 |
+
# make multiselect for run_ids with label on same line
|
70 |
+
run_ids = st.multiselect('Select one or more weights and biases run by id:', df_runs['run_id'], key='run_id', default=df_runs['run_id'][:3], help=f'Select one or more runs to analyze. You can find the raw data for these runs [here]({PROJECT_URL}).')
|
71 |
+
n_runs = len(run_ids)
|
72 |
+
df_runs_subset = df_runs[df_runs['run_id'].isin(run_ids)]
|
73 |
+
|
74 |
+
with st.expander(f'Select from :violet[all] wandb runs'):
|
75 |
|
76 |
edited_df = st.data_editor(
|
77 |
df_runs.assign(Select=False).set_index('Select'),
|
opendashboards/assets/metric.py
CHANGED
@@ -28,29 +28,28 @@ def wandb(df_runs):
|
|
28 |
col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)')
|
29 |
col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)')
|
30 |
col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)')
|
31 |
-
|
32 |
st.markdown('----')
|
33 |
|
34 |
|
35 |
@st.cache_data
|
36 |
-
def runs(df_long):
|
37 |
-
|
38 |
col1, col2, col3, col4 = st.columns(4)
|
39 |
-
print(df_long.columns)
|
40 |
|
41 |
# Convert to appropriate units e.g. 1.2k instead of 1200.c
|
42 |
col1.metric('Runs', fmt(df_long.run_id.nunique()))
|
43 |
col2.metric('Hotkeys', fmt(df_long.hotkey.nunique()))
|
44 |
col3.metric('Events', fmt(df_long.groupby(['run_id','_step']).ngroups))
|
45 |
col4.metric('Completions', fmt(df_long.shape[0]))
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
st.markdown('----')
|
56 |
|
@@ -76,7 +75,7 @@ def uids(df_long, src, uids=None):
|
|
76 |
help='Number of unique completions divided by total number of events'
|
77 |
)
|
78 |
# uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions
|
79 |
-
# uniqueness is the shared completions between selected uids
|
80 |
|
81 |
col3.metric(
|
82 |
label="Uniqueness %",
|
|
|
28 |
col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)')
|
29 |
col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)')
|
30 |
col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)')
|
31 |
+
|
32 |
st.markdown('----')
|
33 |
|
34 |
|
35 |
@st.cache_data
|
36 |
+
def runs(df_long, full=False):
|
37 |
+
|
38 |
col1, col2, col3, col4 = st.columns(4)
|
|
|
39 |
|
40 |
# Convert to appropriate units e.g. 1.2k instead of 1200.c
|
41 |
col1.metric('Runs', fmt(df_long.run_id.nunique()))
|
42 |
col2.metric('Hotkeys', fmt(df_long.hotkey.nunique()))
|
43 |
col3.metric('Events', fmt(df_long.groupby(['run_id','_step']).ngroups))
|
44 |
col4.metric('Completions', fmt(df_long.shape[0]))
|
45 |
+
|
46 |
+
if full:
|
47 |
+
aggs = df_long.groupby('task').agg({'uids': 'nunique', 'completions': 'nunique'})
|
48 |
+
for i,c in enumerate(st.columns(len(aggs))):
|
49 |
+
name = aggs.index[i].title()
|
50 |
+
uid_unique, comp_unique = aggs.iloc[i]
|
51 |
+
c.metric(label=f'{name} UIDs', value=uid_unique)
|
52 |
+
c.metric(label=f'{name} Completions', value=comp_unique)
|
53 |
|
54 |
st.markdown('----')
|
55 |
|
|
|
75 |
help='Number of unique completions divided by total number of events'
|
76 |
)
|
77 |
# uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions
|
78 |
+
# uniqueness is the shared completions between selected uids
|
79 |
|
80 |
col3.metric(
|
81 |
label="Uniqueness %",
|