Spaces:

wnstnb
/

gamedayspx

Sleeping

App Files Files Community

wnstnb commited on Oct 19, 2023

Commit

cf3cfd9

1 Parent(s): 0e5b201

add some tables

Browse files

Files changed (1) hide show

app.py +41 -5

app.py CHANGED Viewed

@@ -109,7 +109,7 @@ with st.form("choose_model"):
             with st.spinner("Training models..."):
                 def train_models():
-                    res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
                     return res1, xgbr, seq2
                 res1, xgbr, seq2 = train_models()
             # st.success("✅ Models trained")
@@ -236,26 +236,37 @@ with st.form("choose_model"):
         data['ClosePct'] = data['ClosePct'].shift(-1)
         res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
         # df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
-        df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
-        df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
         df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
         green_proba = seq_proba[0]
         red_proba = 1 - green_proba
         do_not_play = (seq_proba[0] > lo_thres) and (seq_proba[0] <= hi_thres)
         stdev = 0.01
         score = None
         num_obs = None
         cond = None
         historical_proba = None
         text_cond = None
         operator = None
         intv = None
         for q in df_probas.index:
             if q.left <= green_proba <= q.right:
                 historical_proba = df_probas.loc[q, 'PctGreen']
                 num_obs = df_probas.loc[q, 'NumObs']
                 intv = f'({q.left:.03f}, {q.right:.03f}])'
         qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
@@ -265,6 +276,7 @@ with st.form("choose_model"):
             text_cond = '🟨'
             operator = ''
             score = seq_proba[0]
             cond = (res1['Predicted'] > lo_thres) & (res1['Predicted'] <= hi_thres)
             # num_obs = len(res1.loc[cond])
             # historical_proba = res1.loc[cond, 'True'].mean()
@@ -275,6 +287,7 @@ with st.form("choose_model"):
             text_cond = '🟩'
             operator = '>='
             score = green_proba
             # How many with this score?
             cond = (res1['Predicted'] >= green_proba)
             # num_obs = len(res1.loc[cond])
@@ -287,6 +300,7 @@ with st.form("choose_model"):
             text_cond = '🟥'
             operator = '<='
             score = red_proba
             # How many with this score?
             cond = (res1['Predicted'] <= seq_proba[0])
             # num_obs = len(res1.loc[cond])
@@ -295,6 +309,7 @@ with st.form("choose_model"):
             # print(cond)
         score_fmt = f'{score:.1%}'
         prev_close = data.loc[final_row,'Close']
         curr_close = data['Close'].iloc[-1]
@@ -309,7 +324,8 @@ with st.form("choose_model"):
             index=['Results'],
             data = {
                 'Confidence':[f'{text_cond} {score:.1%}'],
-                'Calib. Proba':[f'{historical_proba:.1%}'],
                 f'{intv}':[f'{num_obs}'],
                 'Prev / Curr':[f'{prev_close:.2f} / {curr_close:.2f}']
             })
@@ -324,6 +340,21 @@ with st.form("choose_model"):
         prices.columns = ['']
         roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
         roc_auc_score_calib = roc_auc_score(res1.dropna(subset='CalibPredicted')['True'].astype(int), res1.dropna(subset='CalibPredicted')['CalibPredicted'].values)
         precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
@@ -424,7 +455,12 @@ with st.form("choose_model"):
         # Cache all DFs
         all_dfs = []
-        st.dataframe(top_of_fold.set_index('Confidence',drop=True), use_container_width=True)
         tab1, tab2, tab3, tab4 = st.tabs(["🤖 Stats", "✨ New Data", "📚 Historical", "📊 Performance"])

             with st.spinner("Training models..."):
                 def train_models():
+                    res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 200, 1)
                     return res1, xgbr, seq2
                 res1, xgbr, seq2 = train_models()
             # st.success("✅ Models trained")
         data['ClosePct'] = data['ClosePct'].shift(-1)
         res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
         # df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
+        df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.median, lambda x: np.quantile(x, 0.25), lambda x: np.quantile(x, 0.75)]})
+        df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','25P','75P']
         df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
+        df_probas['25P'] = df_probas['25P'].apply(lambda x: f'{x:.2%}')
+        df_probas['75P'] = df_probas['75P'].apply(lambda x: f'{x:.2%}')
         green_proba = seq_proba[0]
         red_proba = 1 - green_proba
         do_not_play = (seq_proba[0] > lo_thres) and (seq_proba[0] <= hi_thres)
         stdev = 0.01
         score = None
+        calib_score = None
         num_obs = None
         cond = None
         historical_proba = None
+        red_hist_proba = None
+        mid = None
+        lo = None
+        hi = None
         text_cond = None
         operator = None
         intv = None
         for q in df_probas.index:
             if q.left <= green_proba <= q.right:
                 historical_proba = df_probas.loc[q, 'PctGreen']
+                red_hist_proba = 1 - historical_proba
                 num_obs = df_probas.loc[q, 'NumObs']
+                mid = df_probas.loc[q, 'AvgPerf']
+                lo = df_probas.loc[q, '25P']
+                hi = df_probas.loc[q, '75P']
                 intv = f'({q.left:.03f}, {q.right:.03f}])'
         qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
             text_cond = '🟨'
             operator = ''
             score = seq_proba[0]
+            calib_score = historical_proba
             cond = (res1['Predicted'] > lo_thres) & (res1['Predicted'] <= hi_thres)
             # num_obs = len(res1.loc[cond])
             # historical_proba = res1.loc[cond, 'True'].mean()
             text_cond = '🟩'
             operator = '>='
             score = green_proba
+            calib_score = historical_proba
             # How many with this score?
             cond = (res1['Predicted'] >= green_proba)
             # num_obs = len(res1.loc[cond])
             text_cond = '🟥'
             operator = '<='
             score = red_proba
+            calib_score = red_hist_proba
             # How many with this score?
             cond = (res1['Predicted'] <= seq_proba[0])
             # num_obs = len(res1.loc[cond])
             # print(cond)
         score_fmt = f'{score:.1%}'
+        calib_score_fmt = f'{calib_score:.1%}'
         prev_close = data.loc[final_row,'Close']
         curr_close = data['Close'].iloc[-1]
             index=['Results'],
             data = {
                 'Confidence':[f'{text_cond} {score:.1%}'],
+                # 'Calib. Proba':[f'{historical_proba:.1%}'],
+                'Calib. Proba':[f'{text_cond} {calib_score_fmt}'],
                 f'{intv}':[f'{num_obs}'],
                 'Prev / Curr':[f'{prev_close:.2f} / {curr_close:.2f}']
             })
         prices.columns = ['']
+        targets = pd.DataFrame(
+            index=[
+                f'Curr ({(curr_close / prev_close) - 1:.2%})',
+                f'Low ({lo})',
+                f'Mid ({mid})',
+                f'High ({hi})'
+                ],
+            data=[
+                [f"{curr_close:.0f}"],
+                [f"{(1+float(lo.strip('%'))/100) * prev_close:.0f}"],
+                [f"{(1+float(mid.strip('%'))/100) * prev_close:.0f}"],
+                [f"{(1+float(hi.strip('%'))/100) * prev_close   :.0f}"]
+                ],
+            columns=['Targets'])
         roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
         roc_auc_score_calib = roc_auc_score(res1.dropna(subset='CalibPredicted')['True'].astype(int), res1.dropna(subset='CalibPredicted')['CalibPredicted'].values)
         precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
         # Cache all DFs
         all_dfs = []
+        top1, top2 = st.columns(2)
+        # st.dataframe(top_of_fold.set_index('Confidence',drop=True), use_container_width=True)
+        with top1:
+            st.dataframe(top_of_fold.T, use_container_width=True)
+        with top2:
+            st.dataframe(targets, use_container_width=True)
         tab1, tab2, tab3, tab4 = st.tabs(["🤖 Stats", "✨ New Data", "📚 Historical", "📊 Performance"])