import streamlit as st import pandas as pd import numpy as np from sklearn.metrics import roc_auc_score, precision_score, recall_score from pandas.tseries.offsets import BDay st.set_page_config( page_title="Gameday Model for $SPX", page_icon="๐ŸŽฎ" ) st.title('๐ŸŽฎ Gameday Model for $SPX') st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation and information about model coming soon.') if st.button("๐Ÿงน Clear All"): st.cache_data.clear() col1, col2 = st.columns(2) option = st.selectbox( 'Select a model, then run.', ('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins')) if option == '': st.write('Gotta pick one.') elif option == '๐ŸŒž At Open': if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'): from model_day import * with st.spinner('Loading data...'): data, df_final, final_row = get_data() # st.success("โœ… Historical data") with st.spinner("Training models..."): def train_models(): res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1) return res1, xgbr, seq2 res1, xgbr, seq2 = train_models() # st.success("โœ… Models trained") with st.spinner("Getting new prediction..."): # Get last row new_pred = data.loc[final_row, ['BigNewsDay', 'Quarter', 'Perf5Day', 'Perf5Day_n1', 'DaysGreen', 'DaysRed', 'CurrentGap', 'RangePct', 'RangePct_n1', 'RangePct_n2', 'OHLC4_VIX', 'OHLC4_VIX_n1', 'OHLC4_VIX_n2']] new_pred = pd.DataFrame(new_pred).T # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') curr_date = final_row + BDay(1) curr_date = curr_date.strftime('%Y-%m-%d') new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) new_pred['Quarter'] = new_pred['Quarter'].astype(int) new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) new_pred['RangePct'] = new_pred['RangePct'].astype(float) new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) st.success("โœ… All done!") tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"]) seq_proba = seq_predict_proba(new_pred, xgbr, seq2) green_proba = seq_proba[0] red_proba = 1 - green_proba do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6) stdev = 0.01 score = None num_obs = None cond = None historical_proba = None text_cond = None operator = None if do_not_play: text_cond = '๐ŸŸจ' operator = '' score = seq_proba[0] cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6) num_obs = len(res1.loc[cond]) historical_proba = res1.loc[cond, 'True'].mean() elif green_proba > red_proba: # If the day is predicted to be green, say so text_cond = '๐ŸŸฉ' operator = '>=' score = green_proba # How many with this score? cond = (res1['Predicted'] >= green_proba) num_obs = len(res1.loc[cond]) # How often green? historical_proba = res1.loc[cond, 'True'].mean() # print(cond) elif green_proba <= red_proba: # If the day is predicted to be green, say so text_cond = '๐ŸŸฅ' operator = '<=' score = red_proba # How many with this score? cond = (res1['Predicted'] <= red_proba) num_obs = len(res1.loc[cond]) # How often green? historical_proba = 1 - res1.loc[cond, 'True'].mean() # print(cond) score_fmt = f'{score:.1%}' results = pd.DataFrame(index=[ 'PrevClose', 'Confidence Score', 'Success Rate', f'NumObs {operator} {"" if do_not_play else score_fmt}', ], data = [ f"{data.loc[final_row,'Close']:.2f}", f'{text_cond} {score:.1%}', f'{historical_proba:.1%}', num_obs, ]) results.columns = ['Outputs'] # st.subheader('New Prediction') int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]'] # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]}) df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]}) df_probas.columns = ['PctGreen','NumObs','NumGreen'] roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values) precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5) recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5) len_all = len(res1) res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)] roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values) precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) len_hi = len(res2_filtered) df_performance = pd.DataFrame( index=[ 'N', 'ROC AUC', 'Precision', 'Recall' ], columns = [ 'All', 'High Confidence' ], data = [ [len_all, len_hi], [roc_auc_score_all, roc_auc_score_hi], [precision_score_all, precision_score_hi], [recall_score_all, recall_score_hi] ] ).round(2) def get_acc(t, p): if t == False and p <= 0.4: return 'โœ…' elif t == True and p > 0.6: return 'โœ…' elif t == False and p > 0.6: return 'โŒ' elif t == True and p <= 0.4: return 'โŒ' else: return '๐ŸŸจ' perf_daily = res1.copy() perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])] tab1.subheader(f'Pred for {curr_date} as of 6:30AM PST') tab1.write(results) tab1.write(df_probas) tab2.subheader('Latest Data for Pred') tab2.write(new_pred) tab3.subheader('Historical Data') tab3.write(df_final) tab4.subheader('Performance') tab4.write(df_performance) tab4.write(perf_daily) elif option == 'โŒš 30 Mins': if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'): from model_30m import * with st.spinner('Loading data...'): data, df_final, final_row = get_data() # st.success("โœ… Historical data") with st.spinner("Training models..."): def train_models(): res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1) return res1, xgbr, seq2 res1, xgbr, seq2 = train_models() # st.success("โœ… Models trained") with st.spinner("Getting new prediction..."): # Get last row new_pred = data.loc[final_row, ['BigNewsDay', 'Quarter', 'Perf5Day', 'Perf5Day_n1', 'DaysGreen', 'DaysRed', 'CurrentHigh30toClose', 'CurrentLow30toClose', 'CurrentClose30toClose', 'CurrentRange30', 'GapFill30', 'CurrentGap', 'RangePct', 'RangePct_n1', 'RangePct_n2', 'OHLC4_VIX', 'OHLC4_VIX_n1', 'OHLC4_VIX_n2']] new_pred = pd.DataFrame(new_pred).T # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') curr_date = final_row + BDay(1) curr_date = curr_date.strftime('%Y-%m-%d') new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) new_pred['Quarter'] = new_pred['Quarter'].astype(int) new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float) new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float) new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float) new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float) new_pred['GapFill30'] = new_pred['GapFill30'].astype(float) new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) new_pred['RangePct'] = new_pred['RangePct'].astype(float) new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) st.success("โœ… All done!") tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"]) seq_proba = seq_predict_proba(new_pred, xgbr, seq2) green_proba = seq_proba[0] red_proba = 1 - green_proba do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6) stdev = 0.01 score = None num_obs = None cond = None historical_proba = None text_cond = None operator = None if do_not_play: text_cond = '๐ŸŸจ' operator = '' score = seq_proba[0] cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6) num_obs = len(res1.loc[cond]) historical_proba = res1.loc[cond, 'True'].mean() elif green_proba > red_proba: # If the day is predicted to be green, say so text_cond = '๐ŸŸฉ' operator = '>=' score = green_proba # How many with this score? cond = (res1['Predicted'] >= green_proba) num_obs = len(res1.loc[cond]) # How often green? historical_proba = res1.loc[cond, 'True'].mean() # print(cond) elif green_proba <= red_proba: # If the day is predicted to be green, say so text_cond = '๐ŸŸฅ' operator = '<=' score = red_proba # How many with this score? cond = (res1['Predicted'] <= red_proba) num_obs = len(res1.loc[cond]) # How often green? historical_proba = 1 - res1.loc[cond, 'True'].mean() # print(cond) score_fmt = f'{score:.1%}' results = pd.DataFrame(index=[ 'PrevClose', 'Confidence Score', 'Success Rate', f'NumObs {operator} {"" if do_not_play else score_fmt}', ], data = [ f"{data.loc[final_row,'Close']:.2f}", f'{text_cond} {score:.1%}', f'{historical_proba:.1%}', num_obs, ]) results.columns = ['Outputs'] # st.subheader('New Prediction') int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]'] # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]}) df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]}) df_probas.columns = ['PctGreen','NumObs','NumGreen'] roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values) precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5) recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5) len_all = len(res1) res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)] roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values) precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) len_hi = len(res2_filtered) df_performance = pd.DataFrame( index=[ 'N', 'ROC AUC', 'Precision', 'Recall' ], columns = [ 'All', 'High Confidence' ], data = [ [len_all, len_hi], [roc_auc_score_all, roc_auc_score_hi], [precision_score_all, precision_score_hi], [recall_score_all, recall_score_hi] ] ).round(2) def get_acc(t, p): if t == False and p <= 0.4: return 'โœ…' elif t == True and p > 0.6: return 'โœ…' elif t == False and p > 0.6: return 'โŒ' elif t == True and p <= 0.4: return 'โŒ' else: return '๐ŸŸจ' perf_daily = res1.copy() perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])] tab1.subheader(f'Pred for {curr_date} as of 7AM PST') tab1.write(results) tab1.write(df_probas) tab2.subheader('Latest Data for Pred') tab2.write(new_pred) tab3.subheader('Historical Data') tab3.write(df_final) tab4.subheader('Performance') tab4.write(df_performance) tab4.write(perf_daily.sort_index(ascending=False)) elif option == 'โณ 60 Mins': if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'): from model_1h import * with st.spinner('Loading data...'): data, df_final, final_row = get_data() # st.success("โœ… Historical data") with st.spinner("Training models..."): def train_models(): res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1) return res1, xgbr, seq2 res1, xgbr, seq2 = train_models() # st.success("โœ… Models trained") with st.spinner("Getting new prediction..."): # Get last row new_pred = data.loc[final_row, ['BigNewsDay', 'Quarter', 'Perf5Day', 'Perf5Day_n1', 'DaysGreen', 'DaysRed', 'CurrentHigh30toClose', 'CurrentLow30toClose', 'CurrentClose30toClose', 'CurrentRange30', 'GapFill30', 'CurrentGap', 'RangePct', 'RangePct_n1', 'RangePct_n2', 'OHLC4_VIX', 'OHLC4_VIX_n1', 'OHLC4_VIX_n2']] new_pred = pd.DataFrame(new_pred).T # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') curr_date = final_row + BDay(1) curr_date = curr_date.strftime('%Y-%m-%d') new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) new_pred['Quarter'] = new_pred['Quarter'].astype(int) new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float) new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float) new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float) new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float) new_pred['GapFill30'] = new_pred['GapFill30'].astype(float) new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) new_pred['RangePct'] = new_pred['RangePct'].astype(float) new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) st.success("โœ… All done!") tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"]) seq_proba = seq_predict_proba(new_pred, xgbr, seq2) green_proba = seq_proba[0] red_proba = 1 - green_proba do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6) stdev = 0.01 score = None num_obs = None cond = None historical_proba = None text_cond = None operator = None if do_not_play: text_cond = '๐ŸŸจ' operator = '' score = seq_proba[0] cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6) num_obs = len(res1.loc[cond]) historical_proba = res1.loc[cond, 'True'].mean() elif green_proba > red_proba: # If the day is predicted to be green, say so text_cond = '๐ŸŸฉ' operator = '>=' score = green_proba # How many with this score? cond = (res1['Predicted'] >= green_proba) num_obs = len(res1.loc[cond]) # How often green? historical_proba = res1.loc[cond, 'True'].mean() # print(cond) elif green_proba <= red_proba: # If the day is predicted to be green, say so text_cond = '๐ŸŸฅ' operator = '<=' score = red_proba # How many with this score? cond = (res1['Predicted'] <= red_proba) num_obs = len(res1.loc[cond]) # How often green? historical_proba = 1 - res1.loc[cond, 'True'].mean() # print(cond) score_fmt = f'{score:.1%}' results = pd.DataFrame(index=[ 'PrevClose', 'Confidence Score', 'Success Rate', f'NumObs {operator} {"" if do_not_play else score_fmt}', ], data = [ f"{data.loc[final_row,'Close']:.2f}", f'{text_cond} {score:.1%}', f'{historical_proba:.1%}', num_obs, ]) results.columns = ['Outputs'] # st.subheader('New Prediction') int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]'] # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]}) df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]}) df_probas.columns = ['PctGreen','NumObs','NumGreen'] roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values) precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5) recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5) len_all = len(res1) res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)] roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values) precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) len_hi = len(res2_filtered) df_performance = pd.DataFrame( index=[ 'N', 'ROC AUC', 'Precision', 'Recall' ], columns = [ 'All', 'High Confidence' ], data = [ [len_all, len_hi], [roc_auc_score_all, roc_auc_score_hi], [precision_score_all, precision_score_hi], [recall_score_all, recall_score_hi] ] ).round(2) def get_acc(t, p): if t == False and p <= 0.4: return 'โœ…' elif t == True and p > 0.6: return 'โœ…' elif t == False and p > 0.6: return 'โŒ' elif t == True and p <= 0.4: return 'โŒ' else: return '๐ŸŸจ' perf_daily = res1.copy() perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])] tab1.subheader(f'Pred for {curr_date} as of 7:30AM PST') tab1.write(results) tab1.write(df_probas) tab2.subheader('Latest Data for Pred') tab2.write(new_pred) tab3.subheader('Historical Data') tab3.write(df_final) tab4.subheader('Performance') tab4.write(df_performance) tab4.write(perf_daily)