Spaces:
Sleeping
Sleeping
Sarkosos
commited on
Commit
·
14285d3
1
Parent(s):
5144f34
added plots for total jobs done and unique proteins folded
Browse files
api.py
CHANGED
@@ -8,7 +8,7 @@ import utils
|
|
8 |
import pandas as pd
|
9 |
import uvicorn
|
10 |
|
11 |
-
from classes import Productivity, ProductivityData, Throughput
|
12 |
|
13 |
|
14 |
# Global variables (saves time on loading data)
|
@@ -64,9 +64,9 @@ def productivity_metrics():
|
|
64 |
# Unpack the metrics using the correct keys
|
65 |
result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
|
66 |
all_time = ProductivityData(**result['all_time'])
|
67 |
-
last_24h =
|
68 |
|
69 |
-
return
|
70 |
|
71 |
|
72 |
@app.get("/throughput", response_model=Throughput)
|
|
|
8 |
import pandas as pd
|
9 |
import uvicorn
|
10 |
|
11 |
+
from classes import Productivity, ProductivityData, Last24hProductivityData, Throughput
|
12 |
|
13 |
|
14 |
# Global variables (saves time on loading data)
|
|
|
64 |
# Unpack the metrics using the correct keys
|
65 |
result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
|
66 |
all_time = ProductivityData(**result['all_time'])
|
67 |
+
last_24h = Last24hProductivityData(**result['last_24h'])
|
68 |
|
69 |
+
return {"all_time": all_time, "last_24h": last_24h}
|
70 |
|
71 |
|
72 |
@app.get("/throughput", response_model=Throughput)
|
app.py
CHANGED
@@ -31,39 +31,56 @@ st.subheader('Productivity overview')
|
|
31 |
st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
|
32 |
|
33 |
productivity_all = requests.get(f'{BASE_URL}/productivity').json()
|
34 |
-
|
|
|
35 |
productivity_24h = productivity_all['last_24h']
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
# st.write(productivity_24h)
|
40 |
|
41 |
m1, m2 = st.columns(2)
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
m2.metric('Total jobs completed', f'{productivity.get("total_completed_jobs", 0):,.0f}', delta=f'{productivity_24h.get("total_completed_jobs", 0):,.0f} (24h)')
|
45 |
|
46 |
-
# m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
|
47 |
|
48 |
-
# st.markdown('<br>', unsafe_allow_html=True)
|
49 |
|
50 |
-
#
|
|
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
st.markdown('<br>', unsafe_allow_html=True)
|
69 |
|
|
|
31 |
st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
|
32 |
|
33 |
productivity_all = requests.get(f'{BASE_URL}/productivity').json()
|
34 |
+
completed_jobs = productivity_all['all_time']['total_completed_jobs']
|
35 |
+
|
36 |
productivity_24h = productivity_all['last_24h']
|
37 |
+
completed_jobs = pd.DataFrame(completed_jobs)
|
38 |
+
completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
|
39 |
|
40 |
+
unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
|
41 |
+
unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
|
|
|
42 |
|
43 |
m1, m2 = st.columns(2)
|
44 |
+
m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
|
45 |
+
m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
|
46 |
|
47 |
+
st.markdown('<br>', unsafe_allow_html=True)
|
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|
51 |
+
# time_binned_data_complete = completed_jobs.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
|
52 |
+
# time_binned_data_unique = unique_folded.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
|
53 |
|
54 |
+
PROD_CHOICES = {
|
55 |
+
'Unique proteins folded': 'unique_pdbs',
|
56 |
+
'Total simulations': 'total_pdbs',
|
57 |
+
}
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
|
62 |
+
prod_choice = PROD_CHOICES[prod_choice_label]
|
63 |
+
|
64 |
+
PROD_DATA = {
|
65 |
+
'unique_pdbs': unique_folded,
|
66 |
+
'total_pdbs': completed_jobs,
|
67 |
+
}
|
68 |
+
df = PROD_DATA[prod_choice]
|
69 |
+
|
70 |
+
df = df.sort_values(by='last_event_at').reset_index()
|
71 |
+
|
72 |
+
# Create a cumulative count column
|
73 |
+
df['cumulative_jobs'] = df.index + 1
|
74 |
+
|
75 |
+
# Plot the cumulative jobs over time
|
76 |
+
|
77 |
+
st.plotly_chart(
|
78 |
+
# add fillgradient to make it easier to see the trend
|
79 |
+
px.line(df, x='last_event_at', y='cumulative_jobs',
|
80 |
+
title='Total Jobs Completed Over Time',
|
81 |
+
labels={'last_event_at': 'Time', 'cumulative_jobs': 'Total Jobs Completed'}).update_traces(fill='tozeroy'),
|
82 |
+
use_container_width=True,
|
83 |
+
)
|
84 |
|
85 |
st.markdown('<br>', unsafe_allow_html=True)
|
86 |
|
classes.py
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
from pydantic import BaseModel
|
2 |
|
3 |
class ProductivityData(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
4 |
unique_folded: int
|
5 |
total_completed_jobs: int
|
6 |
|
7 |
class Productivity(BaseModel):
|
8 |
all_time: ProductivityData
|
9 |
-
last_24h:
|
10 |
|
11 |
class ThroughputData(BaseModel):
|
12 |
validator_sent: float
|
|
|
1 |
from pydantic import BaseModel
|
2 |
|
3 |
class ProductivityData(BaseModel):
|
4 |
+
total_completed_jobs: dict[str, dict[int, str]]
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
class Last24hProductivityData(BaseModel):
|
9 |
unique_folded: int
|
10 |
total_completed_jobs: int
|
11 |
|
12 |
class Productivity(BaseModel):
|
13 |
all_time: ProductivityData
|
14 |
+
last_24h: Last24hProductivityData
|
15 |
|
16 |
class ThroughputData(BaseModel):
|
17 |
validator_sent: float
|
utils.py
CHANGED
@@ -164,7 +164,6 @@ def get_data_transferred(df, unit='GB'):
|
|
164 |
def get_productivity(df_all, df_24h):
|
165 |
result = {
|
166 |
'all_time': {
|
167 |
-
'unique_folded': 0,
|
168 |
'total_completed_jobs': 0
|
169 |
},
|
170 |
'last_24h': {
|
@@ -173,19 +172,16 @@ def get_productivity(df_all, df_24h):
|
|
173 |
}
|
174 |
}
|
175 |
if df_all is not None:
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
total_historical_run_updates = df_all.active.isna().sum()
|
180 |
-
total_historical_completed_jobs = total_historical_run_updates//10 # this is an estimate based on minimum number of updates per pdb
|
181 |
|
182 |
result['all_time'].update({
|
183 |
-
'
|
184 |
-
'total_completed_jobs': (completed_jobs_all + total_historical_completed_jobs).item(),
|
185 |
})
|
186 |
|
187 |
if df_24h is not None:
|
188 |
-
completed_jobs_24h = df_24h[df_24h['updated_count']
|
189 |
unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
|
190 |
result['last_24h'].update({
|
191 |
'unique_folded': len(unique_completed_jobs_24h),
|
|
|
164 |
def get_productivity(df_all, df_24h):
|
165 |
result = {
|
166 |
'all_time': {
|
|
|
167 |
'total_completed_jobs': 0
|
168 |
},
|
169 |
'last_24h': {
|
|
|
172 |
}
|
173 |
}
|
174 |
if df_all is not None:
|
175 |
+
|
176 |
+
|
177 |
+
completed_jobs = df_all[df_all['updated_count'] == 10]
|
|
|
|
|
178 |
|
179 |
result['all_time'].update({
|
180 |
+
'total_completed_jobs': completed_jobs[["updated_at", "pdb_id"]].to_dict(),
|
|
|
181 |
})
|
182 |
|
183 |
if df_24h is not None:
|
184 |
+
completed_jobs_24h = df_24h[df_24h['updated_count'] == 10]
|
185 |
unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
|
186 |
result['last_24h'].update({
|
187 |
'unique_folded': len(unique_completed_jobs_24h),
|