gamedayspx / app.py
wnstnb's picture
add avg perg and vix em
3f1f433
raw
history blame
52.5 kB
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, precision_score, recall_score
from pandas.tseries.offsets import BDay
st.set_page_config(
page_title="Gameday Model for $SPX",
page_icon="๐ŸŽฎ"
)
st.title('๐ŸŽฎ Gameday Model for $SPX')
st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
with st.form("choose_model"):
option = st.selectbox(
'Select a model, then run.',
('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins', '๐Ÿ•ฐ 90 Mins'))
col1, col2 = st.columns(2)
with col1:
submitted = st.form_submit_button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run',use_container_width=True)
with col2:
cleared = st.form_submit_button('๐Ÿงน Clear All',use_container_width=True)
if cleared:
st.cache_data.clear()
if option == '':
st.write('No model selected.')
if submitted:
if option == '๐ŸŒž At Open':
# runday = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
# if runday:
from model_day import *
with st.spinner('Loading data...'):
data, df_final, final_row = get_data()
# st.success("โœ… Historical data")
with st.spinner("Training models..."):
def train_models():
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
return res1, xgbr, seq2
res1, xgbr, seq2 = train_models()
# st.success("โœ… Models trained")
with st.spinner("Getting new prediction..."):
# Get last row
new_pred = data.loc[final_row, ['BigNewsDay',
'Quarter',
'Perf5Day',
'Perf5Day_n1',
'DaysGreen',
'DaysRed',
'CurrentGap',
'RangePct',
'RangePct_n1',
'RangePct_n2',
'OHLC4_VIX',
'OHLC4_VIX_n1',
'OHLC4_VIX_n2']]
new_pred = pd.DataFrame(new_pred).T
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
curr_date = final_row + BDay(1)
curr_date = curr_date.strftime('%Y-%m-%d')
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
new_pred['Quarter'] = new_pred['Quarter'].astype(int)
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
new_pred['RangePct'] = new_pred['RangePct'].astype(float)
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
st.success("โœ… All done!")
tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
green_proba = seq_proba[0]
red_proba = 1 - green_proba
do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
stdev = 0.01
score = None
num_obs = None
cond = None
historical_proba = None
text_cond = None
operator = None
if do_not_play:
text_cond = '๐ŸŸจ'
operator = ''
score = seq_proba[0]
cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
num_obs = len(res1.loc[cond])
historical_proba = res1.loc[cond, 'True'].mean()
elif green_proba > red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฉ'
operator = '>='
score = green_proba
# How many with this score?
cond = (res1['Predicted'] >= green_proba)
num_obs = len(res1.loc[cond])
# How often green?
historical_proba = res1.loc[cond, 'True'].mean()
# print(cond)
elif green_proba <= red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฅ'
operator = '<='
score = red_proba
# How many with this score?
cond = (res1['Predicted'] <= seq_proba[0])
num_obs = len(res1.loc[cond])
# How often green?
historical_proba = 1 - res1.loc[cond, 'True'].mean()
# print(cond)
score_fmt = f'{score:.1%}'
results = pd.DataFrame(index=[
'PrevClose',
'Confidence Score',
'Success Rate',
f'NumObs {operator} {"" if do_not_play else score_fmt}',
], data = [
f"{data.loc[final_row,'Close']:.2f}",
f'{text_cond} {score:.1%}',
f'{historical_proba:.1%}',
num_obs,
])
results.columns = ['Outputs']
# st.subheader('New Prediction')
int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
data['ClosePct'] = data['ClosePct'].shift(-1)
res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
len_all = len(res1)
res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
len_hi = len(res2_filtered)
df_performance = pd.DataFrame(
index=[
'N',
'ROC AUC',
'Precision',
'Recall'
],
columns = [
'All',
'High Confidence'
],
data = [
[len_all, len_hi],
[roc_auc_score_all, roc_auc_score_hi],
[precision_score_all, precision_score_hi],
[recall_score_all, recall_score_hi]
]
).round(2)
def get_acc(t, p):
if t == False and p <= 0.4:
return 'โœ…'
elif t == True and p > 0.6:
return 'โœ…'
elif t == False and p > 0.6:
return 'โŒ'
elif t == True and p <= 0.4:
return 'โŒ'
else:
return '๐ŸŸจ'
def get_acc_text(t, p):
if t == False and p <= 0.4:
return 'Correct'
elif t == True and p > 0.6:
return 'Correct'
elif t == False and p > 0.6:
return 'Incorrect'
elif t == True and p <= 0.4:
return 'Incorrect'
else:
return 'No Action'
perf_daily = res1.copy()
perf_daily['TargetDate'] = perf_daily.index + BDay(1)
perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv()
csv = convert_df(perf_daily)
check = data.tail(1)
data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
data['VIX_EM_125'] = data['VIX_EM'] * 1.25
data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
data['VIX_EM_15'] = data['VIX_EM'] * 1.5
data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
data['VIX_EM'] = data['VIX_EM'].shift(1)
data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
df_em.loc['EM 1X'] = [
data['VIX_EM'].iloc[-1].round(2),
data['VIX_EM_Low'].iloc[-1].round(2),
data['VIX_EM_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
]
df_em.loc['EM 1.25X'] = [
data['VIX_EM_125'].iloc[-1].round(2),
data['VIX_EM_125_Low'].iloc[-1].round(2),
data['VIX_EM_125_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
]
df_em.loc[f"EM 1.5X"] = [
data['VIX_EM_15'].iloc[-1].round(2),
data['VIX_EM_15_Low'].iloc[-1].round(2),
data['VIX_EM_15_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
]
with tab1:
st.subheader(f'Pred for {curr_date} as of 6:30AM PST')
st.write(results)
st.write(df_probas)
st.text('VIX EM')
st.write(df_em)
with tab2:
st.subheader('Latest Data for Pred')
st.write(new_pred)
with tab3:
st.subheader('Historical Data')
st.write(df_final)
with tab4:
st.subheader('Performance')
st.write(df_performance)
st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
# st.download_button(
# label="Download Historical Performance",
# data=csv,
fname='performance_for_at_open_model.csv'
# )
elif option == 'โŒš 30 Mins':
# run30 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
# if run30:
from model_30m import *
with st.spinner('Loading data...'):
data, df_final, final_row = get_data()
# st.success("โœ… Historical data")
with st.spinner("Training models..."):
def train_models():
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
return res1, xgbr, seq2
res1, xgbr, seq2 = train_models()
# st.success("โœ… Models trained")
with st.spinner("Getting new prediction..."):
# Get last row
new_pred = data.loc[final_row, ['BigNewsDay',
'Quarter',
'Perf5Day',
'Perf5Day_n1',
'DaysGreen',
'DaysRed',
'CurrentHigh30toClose',
'CurrentLow30toClose',
'CurrentClose30toClose',
'CurrentRange30',
'GapFill30',
'CurrentGap',
'RangePct',
'RangePct_n1',
'RangePct_n2',
'OHLC4_VIX',
'OHLC4_VIX_n1',
'OHLC4_VIX_n2']]
new_pred = pd.DataFrame(new_pred).T
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
curr_date = final_row + BDay(1)
curr_date = curr_date.strftime('%Y-%m-%d')
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
new_pred['Quarter'] = new_pred['Quarter'].astype(int)
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
new_pred['RangePct'] = new_pred['RangePct'].astype(float)
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
st.success("โœ… All done!")
tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
green_proba = seq_proba[0]
red_proba = 1 - green_proba
do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
stdev = 0.01
score = None
num_obs = None
cond = None
historical_proba = None
text_cond = None
operator = None
if do_not_play:
text_cond = '๐ŸŸจ'
operator = ''
score = seq_proba[0]
cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
num_obs = len(res1.loc[cond])
historical_proba = res1.loc[cond, 'True'].mean()
elif green_proba > red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฉ'
operator = '>='
score = green_proba
# How many with this score?
cond = (res1['Predicted'] >= green_proba)
num_obs = len(res1.loc[cond])
# How often green?
historical_proba = res1.loc[cond, 'True'].mean()
# print(cond)
elif green_proba <= red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฅ'
operator = '<='
score = red_proba
# How many with this score?
cond = (res1['Predicted'] <= seq_proba[0])
num_obs = len(res1.loc[cond])
# How often green?
historical_proba = 1 - res1.loc[cond, 'True'].mean()
# print(cond)
score_fmt = f'{score:.1%}'
results = pd.DataFrame(index=[
'PrevClose',
'Confidence Score',
'Success Rate',
f'NumObs {operator} {"" if do_not_play else score_fmt}',
], data = [
f"{data.loc[final_row,'Close']:.2f}",
f'{text_cond} {score:.1%}',
f'{historical_proba:.1%}',
num_obs,
])
results.columns = ['Outputs']
# st.subheader('New Prediction')
int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
data['ClosePct'] = data['ClosePct'].shift(-1)
res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
len_all = len(res1)
res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
len_hi = len(res2_filtered)
df_performance = pd.DataFrame(
index=[
'N',
'ROC AUC',
'Precision',
'Recall'
],
columns = [
'All',
'High Confidence'
],
data = [
[len_all, len_hi],
[roc_auc_score_all, roc_auc_score_hi],
[precision_score_all, precision_score_hi],
[recall_score_all, recall_score_hi]
]
).round(2)
def get_acc(t, p):
if t == False and p <= 0.4:
return 'โœ…'
elif t == True and p > 0.6:
return 'โœ…'
elif t == False and p > 0.6:
return 'โŒ'
elif t == True and p <= 0.4:
return 'โŒ'
else:
return '๐ŸŸจ'
def get_acc_text(t, p):
if t == False and p <= 0.4:
return 'Correct'
elif t == True and p > 0.6:
return 'Correct'
elif t == False and p > 0.6:
return 'Incorrect'
elif t == True and p <= 0.4:
return 'Incorrect'
else:
return 'No Action'
perf_daily = res1.copy()
perf_daily['TargetDate'] = perf_daily.index + BDay(1)
perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv()
csv = convert_df(perf_daily)
check = data.tail(1)
data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
data['VIX_EM_125'] = data['VIX_EM'] * 1.25
data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
data['VIX_EM_15'] = data['VIX_EM'] * 1.5
data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
data['VIX_EM'] = data['VIX_EM'].shift(1)
data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
df_em.loc['EM 1X'] = [
data['VIX_EM'].iloc[-1].round(2),
data['VIX_EM_Low'].iloc[-1].round(2),
data['VIX_EM_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
]
df_em.loc['EM 1.25X'] = [
data['VIX_EM_125'].iloc[-1].round(2),
data['VIX_EM_125_Low'].iloc[-1].round(2),
data['VIX_EM_125_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
]
df_em.loc[f"EM 1.5X"] = [
data['VIX_EM_15'].iloc[-1].round(2),
data['VIX_EM_15_Low'].iloc[-1].round(2),
data['VIX_EM_15_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
]
with tab1:
st.subheader(f'Pred for {curr_date} as of 7AM PST')
st.write(results)
st.write(df_probas)
st.text('VIX EM')
st.write(df_em)
with tab2:
st.subheader('Latest Data for Pred')
st.write(new_pred)
with tab3:
st.subheader('Historical Data')
st.write(df_final)
with tab4:
st.subheader('Performance')
st.write(df_performance)
st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
# st.download_button(
# label="Download Historical Performance",
# data=csv,
fname='performance_for_30m_model.csv'
# )
elif option == 'โณ 60 Mins':
# run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
# if run60:
from model_1h import *
with st.spinner('Loading data...'):
data, df_final, final_row = get_data()
# st.success("โœ… Historical data")
with st.spinner("Training models..."):
def train_models():
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
return res1, xgbr, seq2
res1, xgbr, seq2 = train_models()
# st.success("โœ… Models trained")
with st.spinner("Getting new prediction..."):
# Get last row
new_pred = data.loc[final_row, ['BigNewsDay',
'Quarter',
'Perf5Day',
'Perf5Day_n1',
'DaysGreen',
'DaysRed',
'CurrentHigh30toClose',
'CurrentLow30toClose',
'CurrentClose30toClose',
'CurrentRange30',
'GapFill30',
'CurrentGap',
'RangePct',
'RangePct_n1',
'RangePct_n2',
'OHLC4_VIX',
'OHLC4_VIX_n1',
'OHLC4_VIX_n2']]
new_pred = pd.DataFrame(new_pred).T
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
curr_date = final_row + BDay(1)
curr_date = curr_date.strftime('%Y-%m-%d')
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
new_pred['Quarter'] = new_pred['Quarter'].astype(int)
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
new_pred['RangePct'] = new_pred['RangePct'].astype(float)
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
st.success("โœ… All done!")
tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
green_proba = seq_proba[0]
red_proba = 1 - green_proba
do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
stdev = 0.01
score = None
num_obs = None
cond = None
historical_proba = None
text_cond = None
operator = None
if do_not_play:
text_cond = '๐ŸŸจ'
operator = ''
score = seq_proba[0]
cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
num_obs = len(res1.loc[cond])
historical_proba = res1.loc[cond, 'True'].mean()
elif green_proba > red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฉ'
operator = '>='
score = green_proba
# How many with this score?
cond = (res1['Predicted'] >= green_proba)
num_obs = len(res1.loc[cond])
# How often green?
historical_proba = res1.loc[cond, 'True'].mean()
# print(cond)
elif green_proba <= red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฅ'
operator = '<='
score = red_proba
# How many with this score?
cond = (res1['Predicted'] <= seq_proba[0])
num_obs = len(res1.loc[cond])
# How often green?
historical_proba = 1 - res1.loc[cond, 'True'].mean()
# print(cond)
score_fmt = f'{score:.1%}'
results = pd.DataFrame(index=[
'PrevClose',
'Confidence Score',
'Success Rate',
f'NumObs {operator} {"" if do_not_play else score_fmt}',
], data = [
f"{data.loc[final_row,'Close']:.2f}",
f'{text_cond} {score:.1%}',
f'{historical_proba:.1%}',
num_obs,
])
results.columns = ['Outputs']
# st.subheader('New Prediction')
int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
data['ClosePct'] = data['ClosePct'].shift(-1)
res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
len_all = len(res1)
res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
len_hi = len(res2_filtered)
df_performance = pd.DataFrame(
index=[
'N',
'ROC AUC',
'Precision',
'Recall'
],
columns = [
'All',
'High Confidence'
],
data = [
[len_all, len_hi],
[roc_auc_score_all, roc_auc_score_hi],
[precision_score_all, precision_score_hi],
[recall_score_all, recall_score_hi]
]
).round(2)
def get_acc(t, p):
if t == False and p <= 0.4:
return 'โœ…'
elif t == True and p > 0.6:
return 'โœ…'
elif t == False and p > 0.6:
return 'โŒ'
elif t == True and p <= 0.4:
return 'โŒ'
else:
return '๐ŸŸจ'
def get_acc_text(t, p):
if t == False and p <= 0.4:
return 'Correct'
elif t == True and p > 0.6:
return 'Correct'
elif t == False and p > 0.6:
return 'Incorrect'
elif t == True and p <= 0.4:
return 'Incorrect'
else:
return 'No Action'
perf_daily = res1.copy()
perf_daily['TargetDate'] = perf_daily.index + BDay(1)
perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv()
csv = convert_df(perf_daily)
check = data.tail(1)
data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
data['VIX_EM_125'] = data['VIX_EM'] * 1.25
data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
data['VIX_EM_15'] = data['VIX_EM'] * 1.5
data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
data['VIX_EM'] = data['VIX_EM'].shift(1)
data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
df_em.loc['EM 1X'] = [
data['VIX_EM'].iloc[-1].round(2),
data['VIX_EM_Low'].iloc[-1].round(2),
data['VIX_EM_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
]
df_em.loc['EM 1.25X'] = [
data['VIX_EM_125'].iloc[-1].round(2),
data['VIX_EM_125_Low'].iloc[-1].round(2),
data['VIX_EM_125_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
]
df_em.loc[f"EM 1.5X"] = [
data['VIX_EM_15'].iloc[-1].round(2),
data['VIX_EM_15_Low'].iloc[-1].round(2),
data['VIX_EM_15_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
]
with tab1:
st.subheader(f'Pred for {curr_date} as of 7:30AM PST')
st.write(results)
st.write(df_probas)
st.text('VIX EM')
st.write(df_em)
with tab2:
st.subheader('Latest Data for Pred')
st.write(new_pred)
with tab3:
st.subheader('Historical Data')
st.write(df_final)
with tab4:
st.subheader('Performance')
st.write(df_performance)
st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
# st.download_button(
# label="Download Historical Performance",
# data=csv,
fname='performance_for_60m_model.csv'
# )
elif option == '๐Ÿ•ฐ 90 Mins':
# run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
# if run60:
from model_90m import *
with st.spinner('Loading data...'):
data, df_final, final_row = get_data()
# st.success("โœ… Historical data")
with st.spinner("Training models..."):
def train_models():
res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
return res1, xgbr, seq2
res1, xgbr, seq2 = train_models()
# st.success("โœ… Models trained")
with st.spinner("Getting new prediction..."):
# Get last row
new_pred = data.loc[final_row, ['BigNewsDay',
'Quarter',
'Perf5Day',
'Perf5Day_n1',
'DaysGreen',
'DaysRed',
'CurrentHigh30toClose',
'CurrentLow30toClose',
'CurrentClose30toClose',
'CurrentRange30',
'GapFill30',
'CurrentGap',
'RangePct',
'RangePct_n1',
'RangePct_n2',
'OHLC4_VIX',
'OHLC4_VIX_n1',
'OHLC4_VIX_n2']]
new_pred = pd.DataFrame(new_pred).T
# new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
# last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
curr_date = final_row + BDay(1)
curr_date = curr_date.strftime('%Y-%m-%d')
new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
new_pred['Quarter'] = new_pred['Quarter'].astype(int)
new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
new_pred['RangePct'] = new_pred['RangePct'].astype(float)
new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
st.success("โœ… All done!")
tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
green_proba = seq_proba[0]
red_proba = 1 - green_proba
do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
stdev = 0.01
score = None
num_obs = None
cond = None
historical_proba = None
text_cond = None
operator = None
if do_not_play:
text_cond = '๐ŸŸจ'
operator = ''
score = seq_proba[0]
cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
num_obs = len(res1.loc[cond])
historical_proba = res1.loc[cond, 'True'].mean()
elif green_proba > red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฉ'
operator = '>='
score = green_proba
# How many with this score?
cond = (res1['Predicted'] >= green_proba)
num_obs = len(res1.loc[cond])
# How often green?
historical_proba = res1.loc[cond, 'True'].mean()
# print(cond)
elif green_proba <= red_proba:
# If the day is predicted to be green, say so
text_cond = '๐ŸŸฅ'
operator = '<='
score = red_proba
# How many with this score?
cond = (res1['Predicted'] <= seq_proba[0])
num_obs = len(res1.loc[cond])
# How often green?
historical_proba = 1 - res1.loc[cond, 'True'].mean()
# print(cond)
score_fmt = f'{score:.1%}'
results = pd.DataFrame(index=[
'PrevClose',
'Confidence Score',
'Success Rate',
f'NumObs {operator} {"" if do_not_play else score_fmt}',
], data = [
f"{data.loc[final_row,'Close']:.2f}",
f'{text_cond} {score:.1%}',
f'{historical_proba:.1%}',
num_obs,
])
results.columns = ['Outputs']
# st.subheader('New Prediction')
int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
data['ClosePct'] = data['ClosePct'].shift(-1)
res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
len_all = len(res1)
res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
len_hi = len(res2_filtered)
df_performance = pd.DataFrame(
index=[
'N',
'ROC AUC',
'Precision',
'Recall'
],
columns = [
'All',
'High Confidence'
],
data = [
[len_all, len_hi],
[roc_auc_score_all, roc_auc_score_hi],
[precision_score_all, precision_score_hi],
[recall_score_all, recall_score_hi]
]
).round(2)
def get_acc(t, p):
if t == False and p <= 0.4:
return 'โœ…'
elif t == True and p > 0.6:
return 'โœ…'
elif t == False and p > 0.6:
return 'โŒ'
elif t == True and p <= 0.4:
return 'โŒ'
else:
return '๐ŸŸจ'
def get_acc_text(t, p):
if t == False and p <= 0.4:
return 'Correct'
elif t == True and p > 0.6:
return 'Correct'
elif t == False and p > 0.6:
return 'Incorrect'
elif t == True and p <= 0.4:
return 'Incorrect'
else:
return 'No Action'
perf_daily = res1.copy()
perf_daily['TargetDate'] = perf_daily.index + BDay(1)
perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv()
csv = convert_df(perf_daily)
check = data.tail(1)
data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
data['VIX_EM_125'] = data['VIX_EM'] * 1.25
data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
data['VIX_EM_15'] = data['VIX_EM'] * 1.5
data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
data['VIX_EM'] = data['VIX_EM'].shift(1)
data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
df_em.loc['EM 1X'] = [
data['VIX_EM'].iloc[-1].round(2),
data['VIX_EM_Low'].iloc[-1].round(2),
data['VIX_EM_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
]
df_em.loc['EM 1.25X'] = [
data['VIX_EM_125'].iloc[-1].round(2),
data['VIX_EM_125_Low'].iloc[-1].round(2),
data['VIX_EM_125_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
]
df_em.loc[f"EM 1.5X"] = [
data['VIX_EM_15'].iloc[-1].round(2),
data['VIX_EM_15_Low'].iloc[-1].round(2),
data['VIX_EM_15_High'].iloc[-1].round(2),
f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
]
with tab1:
st.subheader(f'Pred for {curr_date} as of 8AM PST')
st.write(results)
st.write(df_probas)
st.text('VIX EM')
st.write(df_em)
with tab2:
st.subheader('Latest Data for Pred')
st.write(new_pred)
with tab3:
st.subheader('Historical Data')
st.write(df_final)
with tab4:
st.subheader('Performance')
st.write(df_performance)
st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
# st.download_button(
# label="Download Historical Performance",
# data=csv,
fname='performance_for_90m_model.csv'
# )
if submitted:
st.download_button(
label="Download Historical Performance",
data=csv,
file_name=fname,
)
st.caption('โš ๏ธ Downloading the CSV will reload the page. โš ๏ธ')