Spaces:
Sleeping
Sleeping
add performance tab
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ from tqdm import tqdm
|
|
12 |
from sklearn import linear_model
|
13 |
import joblib
|
14 |
import os
|
|
|
15 |
|
16 |
def walk_forward_validation(df, target_column, num_training_rows, num_periods):
|
17 |
|
@@ -383,7 +384,7 @@ if st.button('๐ค Run it'):
|
|
383 |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
|
384 |
|
385 |
st.success("โ
All done!")
|
386 |
-
tab1, tab2, tab3 = st.tabs(["๐ฎ Prediction", "โจ New Data", "๐ Historical"])
|
387 |
|
388 |
seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
|
389 |
# above_pct_green = res1.loc[res1['Predicted'] >= seq_proba, 'True'].mean()
|
@@ -455,6 +456,38 @@ if st.button('๐ค Run it'):
|
|
455 |
# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
|
456 |
df_probas = res1.groupby(pd.cut(res1['Predicted'],[-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf])).agg({'True':[np.mean,len,np.sum]})
|
457 |
df_probas.columns = ['PctGreen','NumObs','NumGreen']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
tab1.subheader('Preds and Probabilities')
|
459 |
tab1.write(results)
|
460 |
tab1.write(df_probas)
|
@@ -465,6 +498,8 @@ if st.button('๐ค Run it'):
|
|
465 |
tab3.subheader('Historical Data')
|
466 |
tab3.write(df_final)
|
467 |
|
|
|
|
|
468 |
|
469 |
# The only variable you can play with as the other ones are historical
|
470 |
# new_pred.loc[:,'CurrentGap'] = -0.01 / 100
|
|
|
12 |
from sklearn import linear_model
|
13 |
import joblib
|
14 |
import os
|
15 |
+
from sklearn.metrics import roc_auc_score, precision_score, recall_score
|
16 |
|
17 |
def walk_forward_validation(df, target_column, num_training_rows, num_periods):
|
18 |
|
|
|
384 |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
|
385 |
|
386 |
st.success("โ
All done!")
|
387 |
+
tab1, tab2, tab3, tab4 = st.tabs(["๐ฎ Prediction", "โจ New Data", "๐ Historical", "๐ Performance"])
|
388 |
|
389 |
seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
|
390 |
# above_pct_green = res1.loc[res1['Predicted'] >= seq_proba, 'True'].mean()
|
|
|
456 |
# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
|
457 |
df_probas = res1.groupby(pd.cut(res1['Predicted'],[-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf])).agg({'True':[np.mean,len,np.sum]})
|
458 |
df_probas.columns = ['PctGreen','NumObs','NumGreen']
|
459 |
+
|
460 |
+
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
|
461 |
+
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
|
462 |
+
recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
|
463 |
+
len_all = len(res1)
|
464 |
+
|
465 |
+
res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
|
466 |
+
|
467 |
+
roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
|
468 |
+
precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
|
469 |
+
recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
|
470 |
+
len_hi = len(res2_filtered)
|
471 |
+
|
472 |
+
df_performance = pd.DataFrame(
|
473 |
+
index=[
|
474 |
+
'N',
|
475 |
+
'ROC AUC',
|
476 |
+
'Precision',
|
477 |
+
'Recall'
|
478 |
+
],
|
479 |
+
columns = [
|
480 |
+
'All',
|
481 |
+
'High Confidence'
|
482 |
+
],
|
483 |
+
data = [
|
484 |
+
[len_all, len_hi],
|
485 |
+
[roc_auc_score_all, roc_auc_score_hi],
|
486 |
+
[precision_score_all, precision_score_hi],
|
487 |
+
[recall_score_all, recall_score_hi]
|
488 |
+
]
|
489 |
+
).round(2)
|
490 |
+
|
491 |
tab1.subheader('Preds and Probabilities')
|
492 |
tab1.write(results)
|
493 |
tab1.write(df_probas)
|
|
|
498 |
tab3.subheader('Historical Data')
|
499 |
tab3.write(df_final)
|
500 |
|
501 |
+
tab4.subheader('Performance')
|
502 |
+
tab3.write(df_performance)
|
503 |
|
504 |
# The only variable you can play with as the other ones are historical
|
505 |
# new_pred.loc[:,'CurrentGap'] = -0.01 / 100
|