File size: 5,946 Bytes
9ac2e0d
 
 
 
 
 
 
 
864cff1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ac2e0d
 
864cff1
9ac2e0d
 
 
864cff1
9ac2e0d
864cff1
 
 
 
 
 
 
9ac2e0d
 
 
 
 
864cff1
9ac2e0d
864cff1
 
9ac2e0d
 
864cff1
 
 
 
 
9ac2e0d
 
 
 
864cff1
 
 
9ac2e0d
 
 
 
 
864cff1
 
 
 
 
 
 
 
 
 
 
 
 
 
9ac2e0d
864cff1
 
 
 
 
 
9ac2e0d
864cff1
9ac2e0d
864cff1
9ac2e0d
 
864cff1
 
9ac2e0d
 
864cff1
 
9ac2e0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
864cff1
 
 
 
 
 
 
9ac2e0d
 
864cff1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import time
import pandas as pd
import streamlit as st
import plotly.express as px

import utils

_ = """
[x] Define KPIs: Number of steps, number of completions and total generated tokens
[x] Data pipeline I: pull run summary data from wandb
[x] Data pipeline II: pull run event data from wandb (max 500 steps per run)
[x] Task trends: Number of tasks over time
[x] Reward trends I: average reward over time, by task
[x] Reward trends II: average nonzero reward over time, by task
[x] Reward trends III: average nonzero normalized reward over time, by task
[x] Explain trends: show release dates to indicate sudden changes 
[ ] Miner trends: associate uids with miner rankings and plot top miner rewards vs network avg 
[ ] Baseline rewards I: compare the network trends with baseline model gpt-3.5-turbo
[ ] Baseline rewards II: compare the network trends with baseline model gpt-4o
[ ] Baseline rewards III: compare the network trends with baseline model zephyr
[ ] Baseline rewards IV: compare the network trends with baseline model solar
[ ] Baseline rewards V: compare the network trends with baseline model llama3 8B
[ ] Baseline rewards VI: compare the network trends with baseline model llama3 70B

---------
"""

st.title('Prompting Subnet Dashboard')
st.markdown('<br>', unsafe_allow_html=True)

# reload data periodically
state_vars = utils.load_state_vars()

df_runs = state_vars['df_runs']
df_runs_24h = state_vars['df_runs_24h']
df_vali = state_vars['df_vali']
df_events = state_vars['df_events']
df_task_counts = state_vars['df_task_counts']
df_m = state_vars['metagraph']
st.toast(f'Loaded {len(df_runs)} runs')

#### ------ PRODUCTIVITY ------

# Overview of productivity
st.subheader('Productivity overview')
st.info('Productivity metrics show how much data has been created by subnet 1')

productivity = utils.get_productivity(df_runs)
productivity_24h = utils.get_productivity(df_runs_24h)


m1, m2, m3, m4 = st.columns(4)
m1.metric('Competition duration', f'{productivity.get("duration").days} days')
m2.metric('Total events', f'{productivity.get("total_events")/1e6:,.2f}M', delta=f'{productivity_24h.get("total_events")/1e6:,.2f}M (24h)')
m3.metric('Total completions', f'{productivity.get("total_completions")/1e9:,.2f}B', delta=f'{productivity_24h.get("total_completions")/1e9:,.2f}B (24h)')
m4.metric('Total dataset tokens', f'{productivity.get("total_tokens")/1e9:,.2f}B', delta=f'{productivity_24h.get("total_tokens")/1e9:,.2f}B (24h)')

st.markdown('<br>', unsafe_allow_html=True)

st.plotly_chart(
    px.area(df_task_counts, y=df_task_counts.columns, title='Data Created by Task', 
            labels={'created_at':'','value':'Total data created'},
            ),
    use_container_width=True,
)

st.markdown('<br>', unsafe_allow_html=True)

# Overview of productivity
st.subheader('Improvement overview')
st.info('Subnet 1 is an endlessly improving system, where miners compete to produce high quality responses to a range of challenging tasks')


TASK_CHOICES = {
    'Question answering': 'qa',
    'Summarization': 'summarization',
    'Date-based question answering': 'date_qa',
    'Math': 'math',
    'Generic instruction': 'generic',
    'Sentiment analysis': 'sentiment',
    'Translation': 'translation',
}

with st.expander('Advanced settings'):
    c1, c2 = st.columns(2)
    remove_zero_rewards = c1.checkbox('Exclude zero rewards', value=True, help='Remove completions which scored zero rewards (failed responses, timeouts etc.)')
    normalize_rewards = c1.checkbox('Normalize rewards', value=True, help='Scale rewards for each task to a maximium value of 1 (approx)')
    show_releases = c1.checkbox('Show releases', value=False, help='Add annotations which indicate when major releases may have impacted network performance')
    moving_avg_window = c2.slider('Moving avg. window', min_value=1, max_value=30, value=14, help='Window size to smooth data and make long term trends clearer')

reward_col = 'normalized_rewards' if normalize_rewards else 'rewards'

df_stats = utils.get_reward_stats(df_events, exclude_multiturn=True, freq='1D', remove_zero_rewards=remove_zero_rewards)


task_choice_label = st.radio('Select task', list(TASK_CHOICES.keys()), index=0, horizontal=True)
task_choice = TASK_CHOICES[task_choice_label]

st.plotly_chart(
    # add fillgradient to make it easier to see the trend
    utils.plot_reward_trends(df_stats, task=task_choice, window=moving_avg_window, col=reward_col, annotate=show_releases, task_label=task_choice_label),
    use_container_width=True,
)

st.markdown('<br>', unsafe_allow_html=True)


#### ------ LEADERBOARD ------

st.subheader('Leaderboard')
st.info('The leaderboard shows the top miners by incentive.')
m1, m2 = st.columns(2)
ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)

df_miners = utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)

# hide colorbar and don't show y axis
st.plotly_chart(
    px.bar(df_miners, x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
            labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
    ).update_layout(coloraxis_showscale=False, yaxis_visible=False),
    use_container_width=True,
)


with st.expander('Show raw metagraph data'):
    st.dataframe(df_m)

st.markdown('<br>', unsafe_allow_html=True)


#### ------ LOGGED RUNS ------

st.subheader('Logged runs')
# st.info('The timeline shows the creation and last event time of each run.')
# st.plotly_chart(
#     px.timeline(df_runs, x_start='created_at', x_end='last_event_at', y='user', color='state',
#                 labels={'created_at':'Created at', 'last_event_at':'Last event at', 'username':''},
#                 ),
#     use_container_width=True
# )

with st.expander('Show raw run data'):
    st.dataframe(df_runs)