Spaces:

wnstnb
/

gamedayspx

Sleeping

App Files Files Community

wnstnb commited on Jul 19, 2023

Commit

b3c44d6

1 Parent(s): 6f9f208

add performance tab

Browse files

Files changed (1) hide show

app.py +36 -1

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from tqdm import tqdm
 from sklearn import linear_model
 import joblib
 import os
 def walk_forward_validation(df, target_column, num_training_rows, num_periods):
@@ -383,7 +384,7 @@ if st.button('🤖 Run it'):
         new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
     st.success("✅ All done!")
-    tab1, tab2, tab3 = st.tabs(["🔮 Prediction", "✨ New Data", "🗄 Historical"])
     seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
     # above_pct_green = res1.loc[res1['Predicted'] >= seq_proba, 'True'].mean()
@@ -455,6 +456,38 @@ if st.button('🤖 Run it'):
     # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
     df_probas = res1.groupby(pd.cut(res1['Predicted'],[-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf])).agg({'True':[np.mean,len,np.sum]})
     df_probas.columns = ['PctGreen','NumObs','NumGreen']
     tab1.subheader('Preds and Probabilities')
     tab1.write(results)
     tab1.write(df_probas)
@@ -465,6 +498,8 @@ if st.button('🤖 Run it'):
     tab3.subheader('Historical Data')
     tab3.write(df_final)
 # The only variable you can play with as the other ones are historical
 # new_pred.loc[:,'CurrentGap'] = -0.01 / 100

 from sklearn import linear_model
 import joblib
 import os
+from sklearn.metrics import roc_auc_score, precision_score, recall_score
 def walk_forward_validation(df, target_column, num_training_rows, num_periods):
         new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
     st.success("✅ All done!")
+    tab1, tab2, tab3, tab4 = st.tabs(["🔮 Prediction", "✨ New Data", "🗄 Historical", "📊 Performance"])
     seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
     # above_pct_green = res1.loc[res1['Predicted'] >= seq_proba, 'True'].mean()
     # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
     df_probas = res1.groupby(pd.cut(res1['Predicted'],[-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf])).agg({'True':[np.mean,len,np.sum]})
     df_probas.columns = ['PctGreen','NumObs','NumGreen']
+    roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
+    precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
+    recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
+    len_all = len(res1)
+    res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
+    roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
+    precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
+    recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
+    len_hi = len(res2_filtered)
+    df_performance = pd.DataFrame(
+        index=[
+            'N',
+            'ROC AUC',
+            'Precision',
+            'Recall'
+        ],
+        columns = [
+            'All',
+            'High Confidence'
+        ],
+        data = [
+            [len_all, len_hi],
+            [roc_auc_score_all, roc_auc_score_hi],
+            [precision_score_all, precision_score_hi],
+            [recall_score_all, recall_score_hi]
+        ]
+    ).round(2)
     tab1.subheader('Preds and Probabilities')
     tab1.write(results)
     tab1.write(df_probas)
     tab3.subheader('Historical Data')
     tab3.write(df_final)
+    tab4.subheader('Performance')
+    tab3.write(df_performance)
 # The only variable you can play with as the other ones are historical
 # new_pred.loc[:,'CurrentGap'] = -0.01 / 100