Spaces:

wnstnb
/

gamedayspx

Sleeping

File size: 23,358 Bytes

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, precision_score, recall_score
from pandas.tseries.offsets import BDay

st.set_page_config(
    page_title="Gameday $SPX",
    page_icon="🎮"
)

st.title('🎮 Gameday Model for $SPX')
st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
with st.form("choose_model"):
    # option = st.selectbox(
    #     'Select a model, then run.',
    #     ('', '🌞 At Open', '⌚ 30 Mins', '⏳ 60 Mins', '🕰 90 Mins'))


    col1, col2 = st.columns(2)
        
    with col1:
        option = st.select_slider(
            'Slide the scale based on PST, then run.',
            ['06:30', '07:00', '07:30', '08:00']
        )
    with col2:
        submitted = st.form_submit_button('🏃🏽‍♂️ Run',use_container_width=True)
        cleared = st.form_submit_button('🧹 Clear All',use_container_width=True)

    if cleared:
        st.cache_data.clear()

    if option == '':
        st.write('No model selected.')

    if submitted:

        if option == '06:30':
        # runday = st.button('🏃🏽‍♂️ Run')
        # if runday:
            from model_day import *

            fname='performance_for_open_model.csv'

            with st.spinner('Loading data...'):
                data, df_final, final_row = get_data()
            # st.success("✅ Historical data")

            with st.spinner("Training models..."):
                def train_models():
                    res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
                    return res1, xgbr, seq2
                res1, xgbr, seq2 = train_models()
            # st.success("✅ Models trained")

            with st.spinner("Getting new prediction..."):

                # Get last row
                new_pred = data.loc[final_row, ['BigNewsDay',
                    'Quarter',
                    'Perf5Day',
                    'Perf5Day_n1',    
                    'DaysGreen',    
                    'DaysRed',    
                    'CurrentGap',
                    'RangePct',
                    'RangePct_n1',
                    'RangePct_n2',
                    'OHLC4_VIX',
                    'OHLC4_VIX_n1',
                    'OHLC4_VIX_n2']]

                new_pred = pd.DataFrame(new_pred).T
                # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
                # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
                curr_date = final_row + BDay(1)
                curr_date = curr_date.strftime('%Y-%m-%d')

                new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
                new_pred['Quarter'] = new_pred['Quarter'].astype(int)
                new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
                new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
                new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
                new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
                new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
                new_pred['RangePct'] = new_pred['RangePct'].astype(float)
                new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
                new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
                new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
                new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
                new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)

                seq_proba = seq_predict_proba(new_pred, xgbr, seq2)

        elif option == '07:00':
        # run30 = st.button('🏃🏽‍♂️ Run')
        # if run30:
            from model_30m import *

            fname='performance_for_30m_model.csv'

            with st.spinner('Loading data...'):
                data, df_final, final_row = get_data()
            # st.success("✅ Historical data")

            with st.spinner("Training models..."):
                def train_models():
                    res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
                    return res1, xgbr, seq2
                res1, xgbr, seq2 = train_models()
            # st.success("✅ Models trained")

            with st.spinner("Getting new prediction..."):

                # Get last row
                new_pred = data.loc[final_row, ['BigNewsDay',
                    'Quarter',
                    'Perf5Day',
                    'Perf5Day_n1',    
                    'DaysGreen',    
                    'DaysRed',
                    'CurrentHigh30toClose',
                    'CurrentLow30toClose',
                    'CurrentClose30toClose',
                    'CurrentRange30',
                    'GapFill30',    
                    'CurrentGap',
                    'RangePct',
                    'RangePct_n1',
                    'RangePct_n2',
                    'OHLC4_VIX',
                    'OHLC4_VIX_n1',
                    'OHLC4_VIX_n2']]

                new_pred = pd.DataFrame(new_pred).T
                # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
                # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
                curr_date = final_row + BDay(1)
                curr_date = curr_date.strftime('%Y-%m-%d')

                new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
                new_pred['Quarter'] = new_pred['Quarter'].astype(int)
                new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
                new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
                new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
                new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
                new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
                new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
                new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
                new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
                new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)  
                new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
                new_pred['RangePct'] = new_pred['RangePct'].astype(float)
                new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
                new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
                new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
                new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
                new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)

                seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
        
        elif option == '07:30':
        # run60 = st.button('🏃🏽‍♂️ Run')
        # if run60:
            from model_1h import *
            
            fname='performance_for_1h_model.csv'

            with st.spinner('Loading data...'):
                data, df_final, final_row = get_data()
            # st.success("✅ Historical data")

            with st.spinner("Training models..."):
                def train_models():
                    res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
                    return res1, xgbr, seq2
                res1, xgbr, seq2 = train_models()
            # st.success("✅ Models trained")

            with st.spinner("Getting new prediction..."):

                # Get last row
                new_pred = data.loc[final_row, ['BigNewsDay',
                    'Quarter',
                    'Perf5Day',
                    'Perf5Day_n1',    
                    'DaysGreen',    
                    'DaysRed',
                    'CurrentHigh30toClose',
                    'CurrentLow30toClose',
                    'CurrentClose30toClose',
                    'CurrentRange30',
                    'GapFill30',    
                    'CurrentGap',
                    'RangePct',
                    'RangePct_n1',
                    'RangePct_n2',
                    'OHLC4_VIX',
                    'OHLC4_VIX_n1',
                    'OHLC4_VIX_n2']]

                new_pred = pd.DataFrame(new_pred).T
                # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
                # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
                curr_date = final_row + BDay(1)
                curr_date = curr_date.strftime('%Y-%m-%d')

                new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
                new_pred['Quarter'] = new_pred['Quarter'].astype(int)
                new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
                new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
                new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
                new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
                new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
                new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
                new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
                new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
                new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)  
                new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
                new_pred['RangePct'] = new_pred['RangePct'].astype(float)
                new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
                new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
                new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
                new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
                new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)

                seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
        
        elif option == '08:00':
        # run60 = st.button('🏃🏽‍♂️ Run')
        # if run60:
            from model_90m import *

            fname='performance_for_90m_model.csv'

            with st.spinner('Loading data...'):
                data, df_final, final_row = get_data()
            # st.success("✅ Historical data")

            with st.spinner("Training models..."):
                def train_models():
                    res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
                    return res1, xgbr, seq2
                res1, xgbr, seq2 = train_models()
            # st.success("✅ Models trained")

            with st.spinner("Getting new prediction..."):

                # Get last row
                new_pred = data.loc[final_row, ['BigNewsDay',
                    'Quarter',
                    'Perf5Day',
                    'Perf5Day_n1',    
                    'DaysGreen',    
                    'DaysRed',
                    'CurrentHigh30toClose',
                    'CurrentLow30toClose',
                    'CurrentClose30toClose',
                    'CurrentRange30',
                    'GapFill30',    
                    'CurrentGap',
                    'RangePct',
                    'RangePct_n1',
                    'RangePct_n2',
                    'OHLC4_VIX',
                    'OHLC4_VIX_n1',
                    'OHLC4_VIX_n2']]

                new_pred = pd.DataFrame(new_pred).T
                # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
                # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
                curr_date = final_row + BDay(1)
                curr_date = curr_date.strftime('%Y-%m-%d')

                new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
                new_pred['Quarter'] = new_pred['Quarter'].astype(int)
                new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
                new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
                new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
                new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
                new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
                new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
                new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
                new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
                new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)  
                new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
                new_pred['RangePct'] = new_pred['RangePct'].astype(float)
                new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
                new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
                new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
                new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
                new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)

                seq_proba = seq_predict_proba(new_pred, xgbr, seq2)

        st.success(f"All done for {option}!", icon="✅")

        green_proba = seq_proba[0]
        red_proba = 1 - green_proba
        do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6) 
        stdev = 0.01
        score = None
        num_obs = None
        cond = None
        historical_proba = None
        text_cond = None
        operator = None

        if do_not_play:
            text_cond = '🟨'
            operator = ''
            score = seq_proba[0]
            cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
            num_obs = len(res1.loc[cond])
            historical_proba = res1.loc[cond, 'True'].mean()

            
        elif green_proba > red_proba:
            # If the day is predicted to be green, say so
            text_cond = '🟩'
            operator = '>='
            score = green_proba
            # How many with this score?
            cond = (res1['Predicted'] >= green_proba)
            num_obs = len(res1.loc[cond])
            # How often green?
            historical_proba = res1.loc[cond, 'True'].mean()
            # print(cond)

        elif green_proba <= red_proba:
            # If the day is predicted to be green, say so
            text_cond = '🟥'
            operator = '<='
            score = red_proba
            # How many with this score?
            cond = (res1['Predicted'] <= seq_proba[0])
            num_obs = len(res1.loc[cond])
            # How often green?
            historical_proba = 1 - res1.loc[cond, 'True'].mean()
            # print(cond)

        score_fmt = f'{score:.1%}'

        results = pd.DataFrame(index=[
            'PrevClose',
            'Confidence Score',
            'Success Rate',
            f'NumObs {operator} {"" if do_not_play else score_fmt}',
        ], data = [
            f"{data.loc[final_row,'Close']:.2f}",
            f'{text_cond} {score:.1%}',
            f'{historical_proba:.1%}', 
            num_obs,
            ])

        results.columns = ['Outputs']

        # st.subheader('New Prediction')

        int_labels = ['(-∞, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, ∞]']
        # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})

        data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
        data['ClosePct'] = data['ClosePct'].shift(-1)
        res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
        df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
        df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
        df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')

        roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
        precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
        recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
        len_all = len(res1)

        res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]

        roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
        precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
        recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
        len_hi = len(res2_filtered)

        df_performance = pd.DataFrame(
            index=[
                'N',
                'ROC AUC',
                'Precision',
                'Recall'
            ],
            columns = [
                'All',
                'High Confidence'
            ],
            data = [
                [len_all, len_hi],
                [roc_auc_score_all, roc_auc_score_hi],
                [precision_score_all, precision_score_hi],
                [recall_score_all, recall_score_hi]
            ]
        ).round(2)

        def get_acc(t, p):
            if t == False and p <= 0.4:
                return '✅'
            elif t == True and p > 0.6:
                return '✅'
            elif t == False and p > 0.6:
                return '❌'
            elif t == True and p <= 0.4:
                return '❌'
            else:
                return '🟨'
            
        def get_acc_text(t, p):
            if t == False and p <= 0.4:
                return 'Correct'
            elif t == True and p > 0.6:
                return 'Correct'
            elif t == False and p > 0.6:
                return 'Incorrect'
            elif t == True and p <= 0.4:
                return 'Incorrect'
            else:
                return 'No Action'

        perf_daily = res1.copy()
        perf_daily['TargetDate'] = perf_daily.index + BDay(1)
        perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
        perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
        perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
        perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]

        def convert_df(df):
            # IMPORTANT: Cache the conversion to prevent computation on every rerun
            return df.to_csv()

        csv = convert_df(perf_daily)
            
        tab1, tab2, tab3, tab4 = st.tabs(["🔮 Prediction", "✨ New Data", "🗄 Historical", "📊 Performance"])

        check = data.tail(1)

        data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
        data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
        data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']

        # Tomorrrow's EM and Today's EM
        fwd_em, curr_em = data['VIX_EM'].iloc[-1], data['VIX_EM'].iloc[-2]

        data['VIX_EM_125'] = data['VIX_EM'] * 1.25
        data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
        data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']

        data['VIX_EM_15'] = data['VIX_EM'] * 1.5
        data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
        data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']

        data['VIX_EM'] = data['VIX_EM'].shift(1)
        data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
        data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)

        data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
        data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
        data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)

        data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
        data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
        data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)

        df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
        df_em.loc['EM 1X'] = [
            data['VIX_EM'].iloc[-1].round(2),
            data['VIX_EM_Low'].iloc[-1].round(2), 
            data['VIX_EM_High'].iloc[-1].round(2), 
            f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
            f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
            ]
        df_em.loc['EM 1.25X'] = [
            data['VIX_EM_125'].iloc[-1].round(2),
            data['VIX_EM_125_Low'].iloc[-1].round(2), 
            data['VIX_EM_125_High'].iloc[-1].round(2), 
            f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
            f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
            ]
        df_em.loc[f"EM 1.5X"] = [
            data['VIX_EM_15'].iloc[-1].round(2),
            data['VIX_EM_15_Low'].iloc[-1].round(2), 
            data['VIX_EM_15_High'].iloc[-1].round(2), 
            f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
            f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
            ]
        
        with tab1:
            st.subheader(f'{option} on {curr_date}')
            st.write(results)
            st.write(df_probas)
            st.text(f'VIX EM ({curr_em:.2f} / {fwd_em:.2f})')
            st.write(df_em)
        with tab2:
            st.subheader('Latest Data for Pred')
            st.write(new_pred)
        with tab3:
            st.subheader('Historical Data')
            st.write(df_final)
        with tab4:
            st.subheader('Performance')
            st.write(df_performance)
            st.text('Performance last 10 days (download for all)')
            st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']].iloc[-10:])
            # st.download_button(
            #     label="Download Historical Performance",
            #     data=csv,
            # )

if submitted:
    st.download_button(
        label="Download Historical Performance",
        data=csv,
        file_name=fname,
    )
    st.caption('⚠️ Downloading the CSV will reload the page. ⚠️')