steffenc commited on
Commit
9ac2e0d
·
1 Parent(s): a9ac6b7

Copy files from folding dashboard

Browse files
Files changed (2) hide show
  1. api.py +147 -0
  2. app.py +140 -0
api.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import atexit
3
+ import datetime
4
+
5
+ from flask import Flask, request, jsonify
6
+ from apscheduler.schedulers.background import BackgroundScheduler
7
+
8
+ import utils
9
+
10
+ app = Flask(__name__)
11
+
12
+
13
+ # Global variables (saves time on loading data)
14
+ state_vars = None
15
+ reload_timestamp = datetime.datetime.now().strftime('%D %T')
16
+
17
+
18
+ def load_data():
19
+ """
20
+ Reload the state variables
21
+ """
22
+ global state_vars, reload_timestamp
23
+ state_vars = utils.load_state_vars()
24
+
25
+ reload_timestamp = datetime.datetime.now().strftime('%D %T')
26
+
27
+ print(f'Reloaded data at {reload_timestamp}')
28
+
29
+
30
+ def start_scheduler():
31
+ scheduler = BackgroundScheduler()
32
+ scheduler.add_job(func=load_data, trigger="interval", seconds=60*30)
33
+ scheduler.start()
34
+
35
+ # Shut down the scheduler when exiting the app
36
+ atexit.register(lambda: scheduler.shutdown())
37
+
38
+
39
+ @app.route('/', methods=['GET'])
40
+ def home():
41
+ return "Welcome to the Bittensor Protein Folding Leaderboard API!"
42
+
43
+
44
+ @app.route('/updated', methods=['GET'])
45
+ def updated():
46
+ return reload_timestamp
47
+
48
+
49
+ @app.route('/data', methods=['GET'])
50
+ @app.route('/data/<period>', methods=['GET'])
51
+ def data(period=None):
52
+ """
53
+ Get the productivity metrics
54
+ """
55
+ assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
56
+ df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
57
+ return jsonify(
58
+ df.astype(str).to_dict(orient='records')
59
+ )
60
+
61
+ @app.route('/productivity', methods=['GET'])
62
+ @app.route('/productivity/<period>', methods=['GET'])
63
+ def productivity_metrics(period=None):
64
+ """
65
+ Get the productivity metrics
66
+ """
67
+
68
+ assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
69
+ df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
70
+ return jsonify(
71
+ utils.get_productivity(df)
72
+ )
73
+
74
+
75
+ @app.route('/throughput', methods=['GET'])
76
+ @app.route('/throughput/<period>', methods=['GET'])
77
+ def throughput_metrics(period=None):
78
+ """
79
+ Get the throughput metrics
80
+ """
81
+ assert period in ('24h', None), f"Invalid period: {period}. Must be '24h' or None."
82
+ df = state_vars["dataframe_24h"] if period == '24h' else state_vars["dataframe"]
83
+ return jsonify(utils.get_data_transferred(df))
84
+
85
+
86
+ @app.route('/metagraph', methods=['GET'])
87
+ def metagraph():
88
+ """
89
+ Get the metagraph data
90
+ Returns:
91
+ - metagraph_data: List of dicts (from pandas DataFrame)
92
+ """
93
+
94
+ df_m = state_vars["metagraph"]
95
+
96
+ return jsonify(
97
+ df_m.to_dict(orient='records')
98
+ )
99
+
100
+ @app.route('/leaderboard', methods=['GET'])
101
+ @app.route('/leaderboard/<entity>', methods=['GET'])
102
+ @app.route('/leaderboard/<entity>/<ntop>', methods=['GET'])
103
+ def leaderboard(entity='identity',ntop=10):
104
+ """
105
+ Get the leaderboard data
106
+ Returns:
107
+ - leaderboard_data: List of dicts (from pandas DataFrame)
108
+ """
109
+
110
+ assert entity in utils.ENTITY_CHOICES, f"Invalid entity choice: {entity}"
111
+
112
+ df_miners = utils.get_leaderboard(
113
+ state_vars["metagraph"],
114
+ ntop=int(ntop),
115
+ entity_choice=entity
116
+ )
117
+
118
+ return jsonify(
119
+ df_miners.to_dict(orient='records')
120
+ )
121
+
122
+ @app.route('/validator', methods=['GET'])
123
+ def validator():
124
+ """
125
+ Get the validator data
126
+ Returns:
127
+ - validator_data: List of dicts (from pandas DataFrame)
128
+ """
129
+ df_m = state_vars["metagraph"]
130
+ df_validators = df_m.loc[df_m.validator_trust > 0]
131
+
132
+ return jsonify(
133
+ df_validators.to_dict(orient='records')
134
+ )
135
+
136
+
137
+ if __name__ == '__main__':
138
+
139
+ load_data()
140
+ start_scheduler()
141
+
142
+ app.run(host='0.0.0.0', port=5001, debug=True)
143
+
144
+
145
+ # to test locally
146
+ # curl -X GET http://0.0.0.0:5001/data
147
+
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ import streamlit as st
4
+ import plotly.express as px
5
+
6
+ import utils
7
+
8
+ _ = """
9
+ Proteins folded (delta 24hr)
10
+ Current proteins folding (24hr)
11
+ Average time to fold trend
12
+ Refolded proteins (group by run id and pdb id and get unique)
13
+ Simulation duration distribution
14
+ """
15
+
16
+ UPDATE_INTERVAL = 3600
17
+
18
+
19
+ st.title('Folding Subnet Dashboard')
20
+ st.markdown('<br>', unsafe_allow_html=True)
21
+
22
+ # reload data periodically
23
+ df = utils.build_data(time.time()//UPDATE_INTERVAL)
24
+ st.toast(f'Loaded {len(df)} runs')
25
+
26
+ # TODO: fix the factor for 24 hours ago
27
+ runs_alive_24h_ago = (df.last_event_at > pd.Timestamp.now() - pd.Timedelta('1d'))
28
+ df_24h = df.loc[runs_alive_24h_ago]
29
+ # correction factor to account for the fact that the data straddles the 24h boundary
30
+ # correction factor is based on the fraction of the run which occurred in the last 24h
31
+ # factor = (df_24h.last_event_at - pd.Timestamp.now() + pd.Timedelta('1d')) / pd.Timedelta('1d')
32
+
33
+
34
+ #### ------ PRODUCTIVITY ------
35
+
36
+ # Overview of productivity
37
+ st.subheader('Productivity overview')
38
+ st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
39
+
40
+ productivity = utils.get_productivity(df)
41
+ productivity_24h = utils.get_productivity(df_24h)
42
+
43
+
44
+ m1, m2, m3 = st.columns(3)
45
+ m1.metric('Unique proteins folded', f'{productivity.get("unique_folded"):,.0f}', delta=f'{productivity_24h.get("unique_folded"):,.0f} (24h)')
46
+ m2.metric('Total proteins folded', f'{productivity.get("total_simulations"):,.0f}', delta=f'{productivity_24h.get("total_simulations"):,.0f} (24h)')
47
+ m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
48
+
49
+ st.markdown('<br>', unsafe_allow_html=True)
50
+
51
+ time_binned_data = df.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
52
+
53
+ PROD_CHOICES = {
54
+ 'Unique proteins folded': 'unique_pdbs',
55
+ 'Total simulations': 'total_pdbs',
56
+ 'Total simulation steps': 'total_md_steps',
57
+ }
58
+ prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
59
+ prod_choice = PROD_CHOICES[prod_choice_label]
60
+ steps_running_total = time_binned_data[prod_choice].sum().cumsum()
61
+ st.plotly_chart(
62
+ # add fillgradient to make it easier to see the trend
63
+ px.area(steps_running_total, y=prod_choice,
64
+ labels={'last_event_at':'', prod_choice: prod_choice_label},
65
+ ).update_traces(fill='tozeroy'),
66
+ use_container_width=True,
67
+ )
68
+
69
+ st.markdown('<br>', unsafe_allow_html=True)
70
+
71
+
72
+ #### ------ THROUGHPUT ------
73
+ st.subheader('Throughput overview')
74
+
75
+ st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
76
+
77
+ MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
78
+
79
+ data_transferred = utils.get_data_transferred(df,unit=MEM_UNIT)
80
+ data_transferred_24h = utils.get_data_transferred(df_24h, unit=MEM_UNIT)
81
+
82
+ m1, m2, m3 = st.columns(3)
83
+ m1.metric(f'Total sent data ({MEM_UNIT})', f'{data_transferred.get("sent"):,.0f}', delta=f'{data_transferred_24h.get("sent"):,.0f} (24h)')
84
+ m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred.get("received"):,.0f}', delta=f'{data_transferred_24h.get("received"):,.0f} (24h)')
85
+ m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred.get("total"):,.0f}', delta=f'{data_transferred_24h.get("total"):,.0f} (24h)')
86
+
87
+
88
+ IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
89
+ io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
90
+ io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
91
+
92
+ st.plotly_chart(
93
+ px.area(io_running_total, y='value', color='variable',
94
+ labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
95
+ ),
96
+ use_container_width=True,
97
+ )
98
+
99
+ st.markdown('<br>', unsafe_allow_html=True)
100
+
101
+
102
+ #### ------ LEADERBOARD ------
103
+
104
+ st.subheader('Leaderboard')
105
+ st.info('The leaderboard shows the top miners by incentive.')
106
+ m1, m2 = st.columns(2)
107
+ ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
108
+ entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
109
+
110
+ df_m = utils.get_metagraph(time.time()//UPDATE_INTERVAL)
111
+ df_miners = utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
112
+
113
+ # hide colorbar and don't show y axis
114
+ st.plotly_chart(
115
+ px.bar(df_miners, x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
116
+ labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
117
+ ).update_layout(coloraxis_showscale=False, yaxis_visible=False),
118
+ use_container_width=True,
119
+ )
120
+
121
+
122
+ with st.expander('Show raw metagraph data'):
123
+ st.dataframe(df_m)
124
+
125
+ st.markdown('<br>', unsafe_allow_html=True)
126
+
127
+
128
+ #### ------ LOGGED RUNS ------
129
+
130
+ st.subheader('Logged runs')
131
+ st.info('The timeline shows the creation and last event time of each run.')
132
+ st.plotly_chart(
133
+ px.timeline(df, x_start='created_at', x_end='last_event_at', y='username', color='state',
134
+ labels={'created_at':'Created at', 'last_event_at':'Last event at', 'username':''},
135
+ ),
136
+ use_container_width=True
137
+ )
138
+
139
+ with st.expander('Show raw run data'):
140
+ st.dataframe(df)