Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.metrics import roc_auc_score, precision_score, recall_score | |
from pandas.tseries.offsets import BDay | |
st.set_page_config( | |
page_title="Gameday $SPX", | |
page_icon="๐ฎ" | |
) | |
st.title('๐ฎ Gameday Model for $SPX') | |
st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)') | |
with st.form("choose_model"): | |
# option = st.selectbox( | |
# 'Select a model, then run.', | |
# ('', '๐ At Open', 'โ 30 Mins', 'โณ 60 Mins', '๐ฐ 90 Mins')) | |
col1, col2 = st.columns(2) | |
with col1: | |
option = st.select_slider( | |
'Slide the scale based on PST, then run.', | |
['06:30', '07:00', '07:30', '08:00'] | |
) | |
with col2: | |
submitted = st.form_submit_button('๐๐ฝโโ๏ธ Run',use_container_width=True) | |
cleared = st.form_submit_button('๐งน Clear All',use_container_width=True) | |
if cleared: | |
st.cache_data.clear() | |
if option == '': | |
st.write('No model selected.') | |
if submitted: | |
if option == '06:30': | |
# runday = st.button('๐๐ฝโโ๏ธ Run') | |
# if runday: | |
from model_day import * | |
fname='performance_for_open_model.csv' | |
with st.spinner('Loading data...'): | |
data, df_final, final_row = get_data() | |
# st.success("โ Historical data") | |
with st.spinner("Training models..."): | |
def train_models(): | |
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1) | |
return res1, xgbr, seq2 | |
res1, xgbr, seq2 = train_models() | |
# st.success("โ Models trained") | |
with st.spinner("Getting new prediction..."): | |
# Get last row | |
new_pred = data.loc[final_row, ['BigNewsDay', | |
'Quarter', | |
'Perf5Day', | |
'Perf5Day_n1', | |
'DaysGreen', | |
'DaysRed', | |
'CurrentGap', | |
'RangePct', | |
'RangePct_n1', | |
'RangePct_n2', | |
'OHLC4_VIX', | |
'OHLC4_VIX_n1', | |
'OHLC4_VIX_n2']] | |
new_pred = pd.DataFrame(new_pred).T | |
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) | |
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') | |
curr_date = final_row + BDay(1) | |
curr_date = curr_date.strftime('%Y-%m-%d') | |
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) | |
new_pred['Quarter'] = new_pred['Quarter'].astype(int) | |
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) | |
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) | |
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) | |
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) | |
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) | |
new_pred['RangePct'] = new_pred['RangePct'].astype(float) | |
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) | |
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) | |
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) | |
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) | |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) | |
seq_proba = seq_predict_proba(new_pred, xgbr, seq2) | |
elif option == '07:00': | |
# run30 = st.button('๐๐ฝโโ๏ธ Run') | |
# if run30: | |
from model_30m import * | |
fname='performance_for_30m_model.csv' | |
with st.spinner('Loading data...'): | |
data, df_final, final_row = get_data() | |
# st.success("โ Historical data") | |
with st.spinner("Training models..."): | |
def train_models(): | |
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1) | |
return res1, xgbr, seq2 | |
res1, xgbr, seq2 = train_models() | |
# st.success("โ Models trained") | |
with st.spinner("Getting new prediction..."): | |
# Get last row | |
new_pred = data.loc[final_row, ['BigNewsDay', | |
'Quarter', | |
'Perf5Day', | |
'Perf5Day_n1', | |
'DaysGreen', | |
'DaysRed', | |
'CurrentHigh30toClose', | |
'CurrentLow30toClose', | |
'CurrentClose30toClose', | |
'CurrentRange30', | |
'GapFill30', | |
'CurrentGap', | |
'RangePct', | |
'RangePct_n1', | |
'RangePct_n2', | |
'OHLC4_VIX', | |
'OHLC4_VIX_n1', | |
'OHLC4_VIX_n2']] | |
new_pred = pd.DataFrame(new_pred).T | |
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) | |
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') | |
curr_date = final_row + BDay(1) | |
curr_date = curr_date.strftime('%Y-%m-%d') | |
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) | |
new_pred['Quarter'] = new_pred['Quarter'].astype(int) | |
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) | |
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) | |
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) | |
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) | |
new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float) | |
new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float) | |
new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float) | |
new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float) | |
new_pred['GapFill30'] = new_pred['GapFill30'].astype(float) | |
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) | |
new_pred['RangePct'] = new_pred['RangePct'].astype(float) | |
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) | |
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) | |
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) | |
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) | |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) | |
seq_proba = seq_predict_proba(new_pred, xgbr, seq2) | |
elif option == '07:30': | |
# run60 = st.button('๐๐ฝโโ๏ธ Run') | |
# if run60: | |
from model_1h import * | |
fname='performance_for_1h_model.csv' | |
with st.spinner('Loading data...'): | |
data, df_final, final_row = get_data() | |
# st.success("โ Historical data") | |
with st.spinner("Training models..."): | |
def train_models(): | |
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1) | |
return res1, xgbr, seq2 | |
res1, xgbr, seq2 = train_models() | |
# st.success("โ Models trained") | |
with st.spinner("Getting new prediction..."): | |
# Get last row | |
new_pred = data.loc[final_row, ['BigNewsDay', | |
'Quarter', | |
'Perf5Day', | |
'Perf5Day_n1', | |
'DaysGreen', | |
'DaysRed', | |
'CurrentHigh30toClose', | |
'CurrentLow30toClose', | |
'CurrentClose30toClose', | |
'CurrentRange30', | |
'GapFill30', | |
'CurrentGap', | |
'RangePct', | |
'RangePct_n1', | |
'RangePct_n2', | |
'OHLC4_VIX', | |
'OHLC4_VIX_n1', | |
'OHLC4_VIX_n2']] | |
new_pred = pd.DataFrame(new_pred).T | |
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) | |
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') | |
curr_date = final_row + BDay(1) | |
curr_date = curr_date.strftime('%Y-%m-%d') | |
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) | |
new_pred['Quarter'] = new_pred['Quarter'].astype(int) | |
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) | |
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) | |
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) | |
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) | |
new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float) | |
new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float) | |
new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float) | |
new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float) | |
new_pred['GapFill30'] = new_pred['GapFill30'].astype(float) | |
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) | |
new_pred['RangePct'] = new_pred['RangePct'].astype(float) | |
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) | |
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) | |
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) | |
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) | |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) | |
seq_proba = seq_predict_proba(new_pred, xgbr, seq2) | |
elif option == '08:00': | |
# run60 = st.button('๐๐ฝโโ๏ธ Run') | |
# if run60: | |
from model_90m import * | |
fname='performance_for_90m_model.csv' | |
with st.spinner('Loading data...'): | |
data, df_final, final_row = get_data() | |
# st.success("โ Historical data") | |
with st.spinner("Training models..."): | |
def train_models(): | |
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1) | |
return res1, xgbr, seq2 | |
res1, xgbr, seq2 = train_models() | |
# st.success("โ Models trained") | |
with st.spinner("Getting new prediction..."): | |
# Get last row | |
new_pred = data.loc[final_row, ['BigNewsDay', | |
'Quarter', | |
'Perf5Day', | |
'Perf5Day_n1', | |
'DaysGreen', | |
'DaysRed', | |
'CurrentHigh30toClose', | |
'CurrentLow30toClose', | |
'CurrentClose30toClose', | |
'CurrentRange30', | |
'GapFill30', | |
'CurrentGap', | |
'RangePct', | |
'RangePct_n1', | |
'RangePct_n2', | |
'OHLC4_VIX', | |
'OHLC4_VIX_n1', | |
'OHLC4_VIX_n2']] | |
new_pred = pd.DataFrame(new_pred).T | |
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values]) | |
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d') | |
curr_date = final_row + BDay(1) | |
curr_date = curr_date.strftime('%Y-%m-%d') | |
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float) | |
new_pred['Quarter'] = new_pred['Quarter'].astype(int) | |
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool) | |
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool) | |
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float) | |
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float) | |
new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float) | |
new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float) | |
new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float) | |
new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float) | |
new_pred['GapFill30'] = new_pred['GapFill30'].astype(float) | |
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float) | |
new_pred['RangePct'] = new_pred['RangePct'].astype(float) | |
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float) | |
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float) | |
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float) | |
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float) | |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float) | |
seq_proba = seq_predict_proba(new_pred, xgbr, seq2) | |
st.success(f"All done for {option}!", icon="โ ") | |
green_proba = seq_proba[0] | |
red_proba = 1 - green_proba | |
do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6) | |
stdev = 0.01 | |
score = None | |
num_obs = None | |
cond = None | |
historical_proba = None | |
text_cond = None | |
operator = None | |
if do_not_play: | |
text_cond = '๐จ' | |
operator = '' | |
score = seq_proba[0] | |
cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6) | |
num_obs = len(res1.loc[cond]) | |
historical_proba = res1.loc[cond, 'True'].mean() | |
elif green_proba > red_proba: | |
# If the day is predicted to be green, say so | |
text_cond = '๐ฉ' | |
operator = '>=' | |
score = green_proba | |
# How many with this score? | |
cond = (res1['Predicted'] >= green_proba) | |
num_obs = len(res1.loc[cond]) | |
# How often green? | |
historical_proba = res1.loc[cond, 'True'].mean() | |
# print(cond) | |
elif green_proba <= red_proba: | |
# If the day is predicted to be green, say so | |
text_cond = '๐ฅ' | |
operator = '<=' | |
score = red_proba | |
# How many with this score? | |
cond = (res1['Predicted'] <= seq_proba[0]) | |
num_obs = len(res1.loc[cond]) | |
# How often green? | |
historical_proba = 1 - res1.loc[cond, 'True'].mean() | |
# print(cond) | |
score_fmt = f'{score:.1%}' | |
results = pd.DataFrame(index=[ | |
'PrevClose', | |
'Confidence Score', | |
'Success Rate', | |
f'NumObs {operator} {"" if do_not_play else score_fmt}', | |
], data = [ | |
f"{data.loc[final_row,'Close']:.2f}", | |
f'{text_cond} {score:.1%}', | |
f'{historical_proba:.1%}', | |
num_obs, | |
]) | |
results.columns = ['Outputs'] | |
# st.subheader('New Prediction') | |
int_labels = ['(-โ, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โ]'] | |
# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]}) | |
data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1 | |
data['ClosePct'] = data['ClosePct'].shift(-1) | |
res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True) | |
df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]}) | |
df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf'] | |
df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}') | |
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values) | |
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5) | |
recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5) | |
len_all = len(res1) | |
res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)] | |
roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values) | |
precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) | |
recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5) | |
len_hi = len(res2_filtered) | |
df_performance = pd.DataFrame( | |
index=[ | |
'N', | |
'ROC AUC', | |
'Precision', | |
'Recall' | |
], | |
columns = [ | |
'All', | |
'High Confidence' | |
], | |
data = [ | |
[len_all, len_hi], | |
[roc_auc_score_all, roc_auc_score_hi], | |
[precision_score_all, precision_score_hi], | |
[recall_score_all, recall_score_hi] | |
] | |
).round(2) | |
def get_acc(t, p): | |
if t == False and p <= 0.4: | |
return 'โ ' | |
elif t == True and p > 0.6: | |
return 'โ ' | |
elif t == False and p > 0.6: | |
return 'โ' | |
elif t == True and p <= 0.4: | |
return 'โ' | |
else: | |
return '๐จ' | |
def get_acc_text(t, p): | |
if t == False and p <= 0.4: | |
return 'Correct' | |
elif t == True and p > 0.6: | |
return 'Correct' | |
elif t == False and p > 0.6: | |
return 'Incorrect' | |
elif t == True and p <= 0.4: | |
return 'Incorrect' | |
else: | |
return 'No Action' | |
perf_daily = res1.copy() | |
perf_daily['TargetDate'] = perf_daily.index + BDay(1) | |
perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])] | |
perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])] | |
perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']] | |
perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']] | |
def convert_df(df): | |
# IMPORTANT: Cache the conversion to prevent computation on every rerun | |
return df.to_csv() | |
csv = convert_df(perf_daily) | |
tab1, tab2, tab3, tab4 = st.tabs(["๐ฎ Prediction", "โจ New Data", "๐ Historical", "๐ Performance"]) | |
check = data.tail(1) | |
data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252)) | |
data['VIX_EM_High'] = data['Close'] + data['VIX_EM'] | |
data['VIX_EM_Low'] = data['Close'] - data['VIX_EM'] | |
# Tomorrrow's EM and Today's EM | |
fwd_em, curr_em = data['VIX_EM'].iloc[-1], data['VIX_EM'].iloc[-2] | |
data['VIX_EM_125'] = data['VIX_EM'] * 1.25 | |
data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125'] | |
data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125'] | |
data['VIX_EM_15'] = data['VIX_EM'] * 1.5 | |
data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15'] | |
data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15'] | |
data['VIX_EM'] = data['VIX_EM'].shift(1) | |
data['VIX_EM_High'] = data['VIX_EM_High'].shift(1) | |
data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1) | |
data['VIX_EM_15'] = data['VIX_EM_15'].shift(1) | |
data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1) | |
data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1) | |
data['VIX_EM_125'] = data['VIX_EM_125'].shift(1) | |
data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1) | |
data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1) | |
df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested']) | |
df_em.loc['EM 1X'] = [ | |
data['VIX_EM'].iloc[-1].round(2), | |
data['VIX_EM_Low'].iloc[-1].round(2), | |
data['VIX_EM_High'].iloc[-1].round(2), | |
f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}", | |
f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}" | |
] | |
df_em.loc['EM 1.25X'] = [ | |
data['VIX_EM_125'].iloc[-1].round(2), | |
data['VIX_EM_125_Low'].iloc[-1].round(2), | |
data['VIX_EM_125_High'].iloc[-1].round(2), | |
f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}", | |
f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}" | |
] | |
df_em.loc[f"EM 1.5X"] = [ | |
data['VIX_EM_15'].iloc[-1].round(2), | |
data['VIX_EM_15_Low'].iloc[-1].round(2), | |
data['VIX_EM_15_High'].iloc[-1].round(2), | |
f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}", | |
f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}" | |
] | |
with tab1: | |
st.subheader(f'{option} on {curr_date}') | |
st.write(results) | |
st.write(df_probas) | |
st.text(f'VIX EM ({curr_em:.2f} / {fwd_em:.2f})') | |
st.write(df_em) | |
with tab2: | |
st.subheader('Latest Data for Pred') | |
st.write(new_pred) | |
with tab3: | |
st.subheader('Historical Data') | |
st.write(df_final) | |
with tab4: | |
st.subheader('Performance') | |
st.write(df_performance) | |
st.text('Performance last 10 days (download for all)') | |
st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']].iloc[-10:]) | |
# st.download_button( | |
# label="Download Historical Performance", | |
# data=csv, | |
# ) | |
if submitted: | |
st.download_button( | |
label="Download Historical Performance", | |
data=csv, | |
file_name=fname, | |
) | |
st.caption('โ ๏ธ Downloading the CSV will reload the page. โ ๏ธ') |