Spaces:
Sleeping
Sleeping
add some tables
Browse files
app.py
CHANGED
@@ -109,7 +109,7 @@ with st.form("choose_model"):
|
|
109 |
|
110 |
with st.spinner("Training models..."):
|
111 |
def train_models():
|
112 |
-
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target',
|
113 |
return res1, xgbr, seq2
|
114 |
res1, xgbr, seq2 = train_models()
|
115 |
# st.success("β
Models trained")
|
@@ -236,26 +236,37 @@ with st.form("choose_model"):
|
|
236 |
data['ClosePct'] = data['ClosePct'].shift(-1)
|
237 |
res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
|
238 |
# df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
|
239 |
-
df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.
|
240 |
|
241 |
-
df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
|
242 |
df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
|
|
|
|
|
243 |
|
244 |
green_proba = seq_proba[0]
|
245 |
red_proba = 1 - green_proba
|
246 |
do_not_play = (seq_proba[0] > lo_thres) and (seq_proba[0] <= hi_thres)
|
247 |
stdev = 0.01
|
248 |
score = None
|
|
|
249 |
num_obs = None
|
250 |
cond = None
|
251 |
historical_proba = None
|
|
|
|
|
|
|
|
|
252 |
text_cond = None
|
253 |
operator = None
|
254 |
intv = None
|
255 |
for q in df_probas.index:
|
256 |
if q.left <= green_proba <= q.right:
|
257 |
historical_proba = df_probas.loc[q, 'PctGreen']
|
|
|
258 |
num_obs = df_probas.loc[q, 'NumObs']
|
|
|
|
|
|
|
259 |
intv = f'({q.left:.03f}, {q.right:.03f}])'
|
260 |
|
261 |
qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
|
@@ -265,6 +276,7 @@ with st.form("choose_model"):
|
|
265 |
text_cond = 'π¨'
|
266 |
operator = ''
|
267 |
score = seq_proba[0]
|
|
|
268 |
cond = (res1['Predicted'] > lo_thres) & (res1['Predicted'] <= hi_thres)
|
269 |
# num_obs = len(res1.loc[cond])
|
270 |
# historical_proba = res1.loc[cond, 'True'].mean()
|
@@ -275,6 +287,7 @@ with st.form("choose_model"):
|
|
275 |
text_cond = 'π©'
|
276 |
operator = '>='
|
277 |
score = green_proba
|
|
|
278 |
# How many with this score?
|
279 |
cond = (res1['Predicted'] >= green_proba)
|
280 |
# num_obs = len(res1.loc[cond])
|
@@ -287,6 +300,7 @@ with st.form("choose_model"):
|
|
287 |
text_cond = 'π₯'
|
288 |
operator = '<='
|
289 |
score = red_proba
|
|
|
290 |
# How many with this score?
|
291 |
cond = (res1['Predicted'] <= seq_proba[0])
|
292 |
# num_obs = len(res1.loc[cond])
|
@@ -295,6 +309,7 @@ with st.form("choose_model"):
|
|
295 |
# print(cond)
|
296 |
|
297 |
score_fmt = f'{score:.1%}'
|
|
|
298 |
|
299 |
prev_close = data.loc[final_row,'Close']
|
300 |
curr_close = data['Close'].iloc[-1]
|
@@ -309,7 +324,8 @@ with st.form("choose_model"):
|
|
309 |
index=['Results'],
|
310 |
data = {
|
311 |
'Confidence':[f'{text_cond} {score:.1%}'],
|
312 |
-
'Calib. Proba':[f'{historical_proba:.1%}'],
|
|
|
313 |
f'{intv}':[f'{num_obs}'],
|
314 |
'Prev / Curr':[f'{prev_close:.2f} / {curr_close:.2f}']
|
315 |
})
|
@@ -324,6 +340,21 @@ with st.form("choose_model"):
|
|
324 |
|
325 |
prices.columns = ['']
|
326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
|
328 |
roc_auc_score_calib = roc_auc_score(res1.dropna(subset='CalibPredicted')['True'].astype(int), res1.dropna(subset='CalibPredicted')['CalibPredicted'].values)
|
329 |
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
|
@@ -424,7 +455,12 @@ with st.form("choose_model"):
|
|
424 |
# Cache all DFs
|
425 |
all_dfs = []
|
426 |
|
427 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
428 |
|
429 |
tab1, tab2, tab3, tab4 = st.tabs(["π€ Stats", "β¨ New Data", "π Historical", "π Performance"])
|
430 |
|
|
|
109 |
|
110 |
with st.spinner("Training models..."):
|
111 |
def train_models():
|
112 |
+
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 200, 1)
|
113 |
return res1, xgbr, seq2
|
114 |
res1, xgbr, seq2 = train_models()
|
115 |
# st.success("β
Models trained")
|
|
|
236 |
data['ClosePct'] = data['ClosePct'].shift(-1)
|
237 |
res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
|
238 |
# df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
|
239 |
+
df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.median, lambda x: np.quantile(x, 0.25), lambda x: np.quantile(x, 0.75)]})
|
240 |
|
241 |
+
df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','25P','75P']
|
242 |
df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
|
243 |
+
df_probas['25P'] = df_probas['25P'].apply(lambda x: f'{x:.2%}')
|
244 |
+
df_probas['75P'] = df_probas['75P'].apply(lambda x: f'{x:.2%}')
|
245 |
|
246 |
green_proba = seq_proba[0]
|
247 |
red_proba = 1 - green_proba
|
248 |
do_not_play = (seq_proba[0] > lo_thres) and (seq_proba[0] <= hi_thres)
|
249 |
stdev = 0.01
|
250 |
score = None
|
251 |
+
calib_score = None
|
252 |
num_obs = None
|
253 |
cond = None
|
254 |
historical_proba = None
|
255 |
+
red_hist_proba = None
|
256 |
+
mid = None
|
257 |
+
lo = None
|
258 |
+
hi = None
|
259 |
text_cond = None
|
260 |
operator = None
|
261 |
intv = None
|
262 |
for q in df_probas.index:
|
263 |
if q.left <= green_proba <= q.right:
|
264 |
historical_proba = df_probas.loc[q, 'PctGreen']
|
265 |
+
red_hist_proba = 1 - historical_proba
|
266 |
num_obs = df_probas.loc[q, 'NumObs']
|
267 |
+
mid = df_probas.loc[q, 'AvgPerf']
|
268 |
+
lo = df_probas.loc[q, '25P']
|
269 |
+
hi = df_probas.loc[q, '75P']
|
270 |
intv = f'({q.left:.03f}, {q.right:.03f}])'
|
271 |
|
272 |
qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
|
|
|
276 |
text_cond = 'π¨'
|
277 |
operator = ''
|
278 |
score = seq_proba[0]
|
279 |
+
calib_score = historical_proba
|
280 |
cond = (res1['Predicted'] > lo_thres) & (res1['Predicted'] <= hi_thres)
|
281 |
# num_obs = len(res1.loc[cond])
|
282 |
# historical_proba = res1.loc[cond, 'True'].mean()
|
|
|
287 |
text_cond = 'π©'
|
288 |
operator = '>='
|
289 |
score = green_proba
|
290 |
+
calib_score = historical_proba
|
291 |
# How many with this score?
|
292 |
cond = (res1['Predicted'] >= green_proba)
|
293 |
# num_obs = len(res1.loc[cond])
|
|
|
300 |
text_cond = 'π₯'
|
301 |
operator = '<='
|
302 |
score = red_proba
|
303 |
+
calib_score = red_hist_proba
|
304 |
# How many with this score?
|
305 |
cond = (res1['Predicted'] <= seq_proba[0])
|
306 |
# num_obs = len(res1.loc[cond])
|
|
|
309 |
# print(cond)
|
310 |
|
311 |
score_fmt = f'{score:.1%}'
|
312 |
+
calib_score_fmt = f'{calib_score:.1%}'
|
313 |
|
314 |
prev_close = data.loc[final_row,'Close']
|
315 |
curr_close = data['Close'].iloc[-1]
|
|
|
324 |
index=['Results'],
|
325 |
data = {
|
326 |
'Confidence':[f'{text_cond} {score:.1%}'],
|
327 |
+
# 'Calib. Proba':[f'{historical_proba:.1%}'],
|
328 |
+
'Calib. Proba':[f'{text_cond} {calib_score_fmt}'],
|
329 |
f'{intv}':[f'{num_obs}'],
|
330 |
'Prev / Curr':[f'{prev_close:.2f} / {curr_close:.2f}']
|
331 |
})
|
|
|
340 |
|
341 |
prices.columns = ['']
|
342 |
|
343 |
+
targets = pd.DataFrame(
|
344 |
+
index=[
|
345 |
+
f'Curr ({(curr_close / prev_close) - 1:.2%})',
|
346 |
+
f'Low ({lo})',
|
347 |
+
f'Mid ({mid})',
|
348 |
+
f'High ({hi})'
|
349 |
+
],
|
350 |
+
data=[
|
351 |
+
[f"{curr_close:.0f}"],
|
352 |
+
[f"{(1+float(lo.strip('%'))/100) * prev_close:.0f}"],
|
353 |
+
[f"{(1+float(mid.strip('%'))/100) * prev_close:.0f}"],
|
354 |
+
[f"{(1+float(hi.strip('%'))/100) * prev_close :.0f}"]
|
355 |
+
],
|
356 |
+
columns=['Targets'])
|
357 |
+
|
358 |
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
|
359 |
roc_auc_score_calib = roc_auc_score(res1.dropna(subset='CalibPredicted')['True'].astype(int), res1.dropna(subset='CalibPredicted')['CalibPredicted'].values)
|
360 |
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
|
|
|
455 |
# Cache all DFs
|
456 |
all_dfs = []
|
457 |
|
458 |
+
top1, top2 = st.columns(2)
|
459 |
+
# st.dataframe(top_of_fold.set_index('Confidence',drop=True), use_container_width=True)
|
460 |
+
with top1:
|
461 |
+
st.dataframe(top_of_fold.T, use_container_width=True)
|
462 |
+
with top2:
|
463 |
+
st.dataframe(targets, use_container_width=True)
|
464 |
|
465 |
tab1, tab2, tab3, tab4 = st.tabs(["π€ Stats", "β¨ New Data", "π Historical", "π Performance"])
|
466 |
|