Sarkosos commited on
Commit
2508d8e
·
1 Parent(s): 328256f

updated api to work for new dashboard

Browse files
Files changed (3) hide show
  1. api.py +32 -12
  2. classes.py +21 -7
  3. utils.py +40 -22
api.py CHANGED
@@ -2,20 +2,20 @@
2
  import atexit
3
  import datetime
4
 
5
- from apscheduler.schedulers.background import BackgroundScheduler
6
- from fastapi import FastAPI
7
- import utils
8
  import pandas as pd
9
  import uvicorn
 
 
10
 
11
- from classes import Productivity, ProductivityData, Last24hProductivityData, Throughput
12
-
13
 
14
  # Global variables (saves time on loading data)
15
  state_vars = None
16
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
17
 
18
  data_all = None
 
19
  data_24h = None
20
 
21
  app = FastAPI()
@@ -24,12 +24,14 @@ def load_data():
24
  """
25
  Reload the state variables
26
  """
27
- global data_all, data_24h, reload_timestamp
28
 
29
- utils.fetch_new_runs()
30
 
31
  data_all = utils.preload_data()
32
 
 
 
33
  data_24h = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
34
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
35
 
@@ -61,13 +63,31 @@ def productivity_metrics():
61
  Get the productivity metrics
62
  """
63
 
64
- # Unpack the metrics using the correct keys
65
- result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
66
- all_time = ProductivityData(**result['all_time'])
67
- last_24h = Last24hProductivityData(**result['last_24h'])
68
 
69
- return {"all_time": all_time, "last_24h": last_24h}
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  @app.get("/throughput", response_model=Throughput)
73
  def throughput_metrics():
 
2
  import atexit
3
  import datetime
4
 
 
 
 
5
  import pandas as pd
6
  import uvicorn
7
+ from apscheduler.schedulers.background import BackgroundScheduler
8
+ from fastapi import FastAPI
9
 
10
+ import utils
11
+ from classes import Metagraph, Productivity, Throughput
12
 
13
  # Global variables (saves time on loading data)
14
  state_vars = None
15
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
16
 
17
  data_all = None
18
+ data_30d = None
19
  data_24h = None
20
 
21
  app = FastAPI()
 
24
  """
25
  Reload the state variables
26
  """
27
+ global data_all, data_30d ,data_24h, reload_timestamp
28
 
29
+ # utils.fetch_new_runs()
30
 
31
  data_all = utils.preload_data()
32
 
33
+ data_30d = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('30 days'))]
34
+
35
  data_24h = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
36
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
37
 
 
63
  Get the productivity metrics
64
  """
65
 
66
+ result = utils.get_productivity(df_all=data_all, df_24h=data_24h, df_30d=data_30d)
 
 
 
67
 
 
68
 
69
+ return result
70
+ @app.get("/metagraph", response_model=Metagraph)
71
+ def get_metagraph():
72
+ """
73
+ Get the metagraph
74
+ """
75
+
76
+ df_m = utils.get_metagraph()
77
+ df_miners = df_m.sort_values('I', ascending=False).reset_index()
78
+ incentives = df_miners['I'].astype(float).values
79
+ emissions = df_miners['E'].astype(float).values
80
+ identities = df_miners['identity']
81
+ hotkeys = df_miners['hotkey']
82
+ coldkeys = df_miners['coldkey']
83
+ trusts = df_miners['trust'].astype(float).values
84
+ results = {'incentives': incentives,
85
+ 'emissions': emissions,
86
+ 'identities': identities,
87
+ 'hotkeys': hotkeys,
88
+ 'coldkeys': coldkeys,
89
+ 'trusts': trusts}
90
+ return results
91
 
92
  @app.get("/throughput", response_model=Throughput)
93
  def throughput_metrics():
classes.py CHANGED
@@ -1,17 +1,23 @@
1
  from pydantic import BaseModel
 
 
 
 
 
 
 
2
 
3
  class ProductivityData(BaseModel):
4
- total_completed_jobs: dict[str, dict[int, str]]
5
-
6
-
7
-
8
- class Last24hProductivityData(BaseModel):
9
  unique_folded: int
10
  total_completed_jobs: int
 
 
11
 
 
12
  class Productivity(BaseModel):
13
  all_time: ProductivityData
14
- last_24h: Last24hProductivityData
 
15
 
16
  class ThroughputData(BaseModel):
17
  validator_sent: float
@@ -20,4 +26,12 @@ class ThroughputData(BaseModel):
20
  class Throughput(BaseModel):
21
  all_time: ThroughputData
22
  last_24h: ThroughputData
23
- data: dict
 
 
 
 
 
 
 
 
 
1
  from pydantic import BaseModel
2
+ from datetime import datetime
3
+ from typing import List
4
+
5
+
6
+ class Data(BaseModel):
7
+ last_event_at: List[datetime]
8
+ cumulative_jobs: List[int]
9
 
10
  class ProductivityData(BaseModel):
 
 
 
 
 
11
  unique_folded: int
12
  total_completed_jobs: int
13
+ unique_folded_data: Data
14
+ total_completed_jobs_data: Data
15
 
16
+
17
  class Productivity(BaseModel):
18
  all_time: ProductivityData
19
+ last_24h: ProductivityData
20
+ last_30d: ProductivityData
21
 
22
  class ThroughputData(BaseModel):
23
  validator_sent: float
 
26
  class Throughput(BaseModel):
27
  all_time: ThroughputData
28
  last_24h: ThroughputData
29
+ data: dict
30
+
31
+ class Metagraph(BaseModel):
32
+ incentives: List[float]
33
+ emissions: List[float]
34
+ identities: List[str]
35
+ hotkeys: List[str]
36
+ coldkeys: List[str]
37
+ trusts: List[float]
utils.py CHANGED
@@ -8,7 +8,6 @@ import pandas as pd
8
  import streamlit as st
9
  import tqdm
10
  import wandb
11
-
12
  # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
13
  # TODO: Store relevant wandb data in a database for faster access
14
 
@@ -192,40 +191,60 @@ def get_data_transferred(df, df_24h, unit='GB'):
192
  'data': df[['md_inputs_sum', 'md_outputs_sum', 'updated_at']].to_dict()
193
  }
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- def get_productivity(df_all, df_24h):
197
  result = {
198
  'all_time': {
199
- 'total_completed_jobs': 0
 
 
 
200
  },
201
  'last_24h': {
202
  'unique_folded': 0,
203
- 'total_completed_jobs': 0
 
 
 
 
 
 
 
 
204
  }
205
  }
206
- if df_all is not None:
207
-
208
-
209
- completed_jobs = df_all[df_all['updated_count'] == 10]
210
-
211
- result['all_time'].update({
212
- 'total_completed_jobs': completed_jobs[["updated_at", "pdb_id"]].to_dict(),
213
- })
214
 
215
  if df_24h is not None:
216
- completed_jobs_24h = df_24h[df_24h['updated_count'] == 10]
217
- unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
218
- result['last_24h'].update({
219
- 'unique_folded': len(unique_completed_jobs_24h),
220
- 'total_completed_jobs': len(completed_jobs_24h)
221
- })
222
  return result
223
 
224
- def get_leaderboard(df, ntop=10, entity_choice='identity'):
225
 
226
  df = df.loc[df.validator_permit==False]
227
  df.index = range(df.shape[0])
228
- return df.groupby(entity_choice).I.sum().sort_values().reset_index().tail(ntop)
229
 
230
 
231
 
@@ -305,8 +324,7 @@ def preload_data():
305
  return combined_df
306
 
307
  @st.cache_data()
308
- def get_metagraph(time):
309
- print(f'Loading metagraph with time {time}')
310
  subtensor = bt.subtensor(network=NETWORK)
311
  m = subtensor.metagraph(netuid=NETUID)
312
  meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']
 
8
  import streamlit as st
9
  import tqdm
10
  import wandb
 
11
  # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
12
  # TODO: Store relevant wandb data in a database for faster access
13
 
 
191
  'data': df[['md_inputs_sum', 'md_outputs_sum', 'updated_at']].to_dict()
192
  }
193
 
194
+ def calculate_productivity_data(df):
195
+ completed_jobs = df[df['updated_count'] == 10]
196
+ completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
197
+ unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
198
+ completed_jobs = completed_jobs.sort_values(by='last_event_at').reset_index()
199
+ completed_jobs['cumulative_jobs'] = completed_jobs.index + 1
200
+ unique_folded = unique_folded.sort_values(by='last_event_at').reset_index()
201
+ unique_folded['cumulative_jobs'] = unique_folded.index + 1
202
+ return {
203
+ 'unique_folded': len(unique_folded),
204
+ 'total_completed_jobs': len(completed_jobs),
205
+ 'unique_folded_data': {'last_event_at': unique_folded['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':unique_folded['cumulative_jobs'].values},
206
+ 'total_completed_jobs_data': {'last_event_at': completed_jobs['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':completed_jobs['cumulative_jobs'].values}
207
+ }
208
 
209
+ def get_productivity(df_all, df_24h, df_30d):
210
  result = {
211
  'all_time': {
212
+ 'unique_folded': 0,
213
+ 'total_completed_jobs': 0,
214
+ 'unique_folded_data': {},
215
+ 'total_completed_jobs_data': {}
216
  },
217
  'last_24h': {
218
  'unique_folded': 0,
219
+ 'total_completed_jobs': 0,
220
+ "unique_folded_data": {},
221
+ 'total_completed_jobs_data': {}
222
+ },
223
+ 'last_30d': {
224
+ 'unique_folded': 0,
225
+ 'total_completed_jobs': 0,
226
+ "unique_folded_data": {},
227
+ 'total_completed_jobs_data': {}
228
  }
229
  }
230
+
231
+
232
+
233
+ if df_all is not None:
234
+ result['all_time'].update(calculate_productivity_data(df_all))
 
 
 
235
 
236
  if df_24h is not None:
237
+ result['last_24h'].update(calculate_productivity_data(df_24h))
238
+
239
+ if df_30d is not None:
240
+ result['last_30d'].update(calculate_productivity_data(df_30d))
 
 
241
  return result
242
 
243
+ def get_leaderboard(df, entity_choice='identity'):
244
 
245
  df = df.loc[df.validator_permit==False]
246
  df.index = range(df.shape[0])
247
+ return df.groupby(entity_choice).I.sum().sort_values().reset_index()
248
 
249
 
250
 
 
324
  return combined_df
325
 
326
  @st.cache_data()
327
+ def get_metagraph():
 
328
  subtensor = bt.subtensor(network=NETWORK)
329
  m = subtensor.metagraph(netuid=NETUID)
330
  meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']