Spaces:

Multichem
/

MLB_season_long

Running

File size: 19,046 Bytes

a71bb4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78dd603
a71bb4c
 
50b88d7
 
 
a71bb4c
 
78dd603
d1f3fde
28488bc
78dd603
a71bb4c
78dd603
 
a71bb4c
28488bc
78dd603
a71bb4c
fbe9d78
37e7574
fbe9d78
 
 
 
a71bb4c
 
 
 
fbe9d78
50b88d7
a71bb4c
628478b
a71bb4c
 
 
 
fbe9d78
50b88d7
fbe9d78
0e9c889
a138059
0e9c889
87c8996
 
a138059
fbe9d78
 
 
 
 
 
 
 
 
 
 
 
 
50b88d7
41070c3
50b88d7
 
 
 
 
 
 
9a68921
8c40a43
50b88d7
8c40a43
86aaff4
f307310
a71bb4c
 
78dd603
9a68921
78dd603
a71bb4c
78dd603
a71bb4c
 
fbe9d78
 
a71bb4c
fbe9d78
50b88d7
41070c3
50b88d7
 
 
 
 
 
 
fbe9d78
8c40a43
50b88d7
8c40a43
86aaff4
f307310
78dd603
a71bb4c
78dd603
9a68921
78dd603
a71bb4c
78dd603
a71bb4c
 
fbe9d78
 
a71bb4c
fbe9d78
50b88d7
a71bb4c
 
 
 
 
 
8a32f7c
78dd603
0e29400
a71bb4c
 
 
 
41070c3
dd69ac1
a71bb4c
 
 
 
a18a6e0
78dd603
ecbdea9
a18a6e0
78dd603
fef14ec
a18a6e0
78dd603
fef14ec
a71bb4c
a06639c
dd69ac1
7bdbeb6
c206262
602269d
a71bb4c
 
a06639c
 
 
 
 
3eed879
 
a06639c
c206262
a06639c
3eed879
a06639c
 
a71bb4c
 
 
a06639c
a71bb4c
3afb181
 
 
 
a71bb4c
3afb181
 
 
 
78dd603
 
 
3afb181
 
 
a71bb4c
 
3afb181
a71bb4c
78dd603
3eed879
2e4a606
a06639c
cf14335
 
50b88d7
78dd603
fbe9d78
 
a71bb4c
fbe9d78
50b88d7
a71bb4c
 
 
 
 
 
e7846f5
fbe9d78
78dd603
0e29400
a71bb4c
78dd603
a71bb4c
78dd603
41070c3
dd69ac1
78dd603
 
 
 
e7846f5
 
a06639c
e7846f5
 
ecbdea9
e7846f5
 
ecbdea9
e7846f5
 
a06639c
e7846f5
 
a06639c
78dd603
a06639c
dd69ac1
a06639c
795eb65
602269d
78dd603
 
a06639c
 
 
 
 
3eed879
 
a06639c
09558fc
a06639c
3eed879
a06639c
 
78dd603
 
 
a06639c
78dd603
3afb181
 
 
 
78dd603
3afb181
 
 
 
78dd603
 
 
3afb181
 
 
78dd603
 
3afb181
78dd603
 
3eed879
2e4a606
3eed879
a06639c
 
50b88d7

import streamlit as st
st.set_page_config(layout="wide")

for name in dir():
    if not name.startswith('_'):
        del globals()[name]

import numpy as np
import pandas as pd
import streamlit as st
import gspread
import plotly.express as px
import random
import gc

@st.cache_resource
def init_conn():
        scope = ['https://www.googleapis.com/auth/spreadsheets',
        "https://www.googleapis.com/auth/drive"]

        credentials = {
          "type": "service_account",
          "project_id": "model-sheets-connect",
          "private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e",
          "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n",
          "client_email": "[email protected]",
          "client_id": "100369174533302798535",
          "auth_uri": "https://accounts.google.com/o/oauth2/auth",
          "token_uri": "https://oauth2.googleapis.com/token",
          "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
          "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com"
        }

        gc_con = gspread.service_account_from_dict(credentials)
      
        return gc_con

gcservice_account = init_conn()

master_hold = 'https://docs.google.com/spreadsheets/d/1D526UlXmrz-8qxVcUKrA-u7f6FftUiBufxDnzQv980k/edit#gid=791804525'


sim_format = {'Top_finish': '{:.2%}', 'Top_5_finish': '{:.2%}', 'Top_10_finish': '{:.2%}'}

@st.cache_resource(ttl = 600)
def init_baselines():
    sh = gcservice_account.open_by_url(master_hold)
    worksheet = sh.worksheet('Pitcher_Proj')
    raw_display = pd.DataFrame(worksheet.get_all_records())
    raw_display.replace("", np.nan, inplace=True)
    pitcher_proj = raw_display.dropna()

    sh = gcservice_account.open_by_url(master_hold)
    worksheet = sh.worksheet('Hitter_Proj')
    raw_display = pd.DataFrame(worksheet.get_all_records())
    raw_display.replace("", np.nan, inplace=True)
    hitter_proj = raw_display.dropna()
    
    sh = gcservice_account.open_by_url(master_hold)
    worksheet = sh.worksheet('Display')
    raw_display = pd.DataFrame(worksheet.get_all_records())
    wins_proj = raw_display.dropna()
    
    return pitcher_proj, hitter_proj, wins_proj

def convert_df_to_csv(df):
    return df.to_csv().encode('utf-8')

pitcher_proj, hitter_proj, wins_proj = init_baselines()
total_teams = pitcher_proj['Team'].values.tolist()

tab1, tab2, tab3, tab4, tab5 = st.tabs(["Team Win Projections", "Pitcher Projections", "Hitter Projections", "Pitcher Simulations", "Hitter Simulations"])

with tab1:
    if st.button("Reset Data", key='reset1'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    raw_frame = wins_proj.copy()
    export_frame_team = raw_frame[['Team', '2B', 'HR', 'SB', 'P_SO', 'P_H', 'P_R', 'P_HR', 'P_BB', 'LY Added', 'Added', 'LY Adj Wins', 'Adj Wins', 'Vegas', 'Proj wins', 'Diff']]
    export_frame_team = export_frame_team.sort_values(by='Proj wins', ascending=False)
    disp_frame = raw_frame[['Team', '2B', 'HR', 'SB', 'P_SO', 'P_H', 'P_R', 'P_HR', 'P_BB', 'LY Added', 'Added', 'LY Adj Wins', 'Adj Wins', 'Vegas', 'Proj wins', 'Diff']]
    disp_frame = disp_frame.sort_values(by='Proj wins', ascending=False)

    st.dataframe(disp_frame.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), height = 1000, use_container_width = True)
    
    st.download_button(
        label="Export Team Win Projections",
        data=convert_df_to_csv(export_frame_team),
        file_name='MLB_team_win_export.csv',
        mime='text/csv',
        key='team_win_export',
    )

with tab2:
    if st.button("Reset Data", key='reset2'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    raw_frame = pitcher_proj.copy()
    split_var1 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='split_var1')
    if split_var1 == 'Specific Teams':
        team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='team_var1')
    elif split_var1 == 'All':
        team_var1 = total_teams
    
    working_data = raw_frame[raw_frame['Team'].isin(team_var1)]
    export_frame_sp = raw_frame[['Name', 'Team', 'TBF', 'Ceiling_var', 'True_AVG', 'Hits', 'Singles%', 'Singles', 'Doubles%', 'Doubles', 'xHR%', 'Homeruns', 'Strikeout%', 'Strikeouts',
                              'Walk%', 'Walks', 'Runs%', 'Runs', 'ERA', 'Wins', 'Quality_starts', 'ADP', 'UD_fpts', 'DK_fpts']]
    disp_frame_sp = working_data[['Name', 'Team', 'TBF', 'True_AVG', 'Hits', 'Singles', 'Doubles', 'Homeruns', 'Strikeouts',
                            'Walks', 'Runs', 'ERA', 'Wins', 'Quality_starts', 'ADP', 'UD_fpts', 'DK_fpts']]
    disp_frame_sp = disp_frame_sp.sort_values(by='UD_fpts', ascending=False)
    st.dataframe(disp_frame_sp.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn_r').background_gradient(cmap='RdYlGn', subset=['TBF', 'Strikeouts', 'Wins', 'Quality_starts', 'UD_fpts', 'DK_fpts']).format(precision=2), height = 1000, use_container_width = True)
    
    st.download_button(
        label="Export Pitcher Projections",
        data=convert_df_to_csv(export_frame_sp),
        file_name='MLB_pitcher_proj_export.csv',
        mime='text/csv',
        key='pitcher_proj_export',
    )

with tab3:
    if st.button("Reset Data", key='reset3'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    raw_frame = hitter_proj.copy()
    split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='split_var2')
    if split_var2 == 'Specific Teams':
        team_var2 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='team_var2')
    elif split_var2 == 'All':
        team_var2 = total_teams
    
    working_data = raw_frame[raw_frame['Team'].isin(team_var2)]
    export_frame_h = raw_frame[['Name', 'Team', 'PA', 'Ceiling_var', 'Walk%', 'Walks', 'xHits', 'Singles%', 'Singles', 'Doubles%', 'Doubles',
                              'xHR%', 'Homeruns', 'Runs%', 'Runs', 'RBI%', 'RBI', 'Steal%', 'Stolen_bases', 'ADP', 'UD_fpts', 'DK_fpts']]
    disp_frame_h = working_data[['Name', 'Team', 'PA', 'Walks', 'xHits', 'Singles', 'Doubles',
                            'Homeruns', 'Runs', 'RBI', 'Stolen_bases', 'ADP', 'UD_fpts', 'DK_fpts']]
    disp_frame_h = disp_frame_h.sort_values(by='UD_fpts', ascending=False)
    st.dataframe(disp_frame_h.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').background_gradient(cmap='RdYlGn_r', subset=['ADP']).format(precision=2), height = 1000, use_container_width = True)
    
    st.download_button(
        label="Export Hitter Projections",
        data=convert_df_to_csv(export_frame_h),
        file_name='MLB_hitter_proj_export.csv',
        mime='text/csv',
        key='hitter_proj_export',
    )
    
with tab4:
    if st.button("Reset Data", key='reset4'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    col1, col2 = st.columns([1, 5])
    
    with col2:
        df_hold_container = st.empty()
    
    with col1:
        prop_type_var_sp = st.selectbox('Select type of prop to simulate', options = ['Strikeouts', 'Wins', 'Quality_starts'], key='prop_type_var_sp')

        if st.button('Simulate Stat', key='sim_sp'):
            with col2:
                   
                    with df_hold_container.container():

                        df = pitcher_proj.copy()
                        
                        total_sims = 5000

                        df.replace("", 0, inplace=True)

                        if prop_type_var_sp == 'Strikeouts':
                            df['Median'] = df['Strikeouts']
                            stat_cap = 300
                        elif prop_type_var_sp == 'Wins':
                            df['Median'] = df['Wins']
                            stat_cap = 25
                        elif prop_type_var_sp == 'Quality_starts':
                            df['Median'] = df['Quality_starts']
                            stat_cap = 30

                        flex_file = df.copy()
                        flex_file.rename(columns={"Name": "Player"}, inplace = True)
                        flex_file['Floor'] = (flex_file['Median'] * .25)
                        flex_file['Ceiling'] = np.where((flex_file['Median'] + (flex_file['Median'] * flex_file['Ceiling_var'])) > stat_cap, stat_cap + (flex_file['Median']/10), (flex_file['Median'] + (flex_file['Median'] * flex_file['Ceiling_var'])))
                        flex_file['STD'] = (flex_file['Median']/3)
                        flex_file = flex_file[['Player', 'Floor', 'Median', 'Ceiling', 'STD']]

                        hold_file = flex_file.copy()
                        hold_file = hold_file.sort_values(by='Median', ascending=False)
                        overall_file = flex_file.copy()
                        overall_file = overall_file.sort_values(by='Median', ascending=False)

                        overall_players = overall_file[['Player']]

                        for x in range(0,total_sims):
                            overall_file['g'] = np.random.gumbel(overall_file['Median'] * .75,overall_file['STD'])
                            overall_file[x] = np.where((overall_file['g']<=overall_file['Ceiling']),overall_file['g'],overall_file['Ceiling'])

                        check_file = overall_file.copy()
                        overall_file=overall_file.drop(['Player', 'Floor', 'Median', 'Ceiling', 'STD', 'g'], axis=1)
                        overall_file.astype('int').dtypes

                        players_only = hold_file[['Player']]
                        raw_lineups_file = players_only.copy()

                        for x in range(0,total_sims):
                            maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
                            raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
                            players_only[x] = raw_lineups_file[x].rank(ascending=False)

                        players_only=players_only.drop(['Player'], axis=1)
                        players_only.astype('int').dtypes

                        players_only['Average_Rank'] = players_only.mean(axis=1)
                        players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
                        players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
                        players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
                        players_only['10%'] = overall_file.quantile(0.1, axis=1)
                        players_only['90%'] = overall_file.quantile(0.9, axis=1)

                        players_only['Player'] = hold_file[['Player']]

                        final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '10%', '90%']]

                        final_Proj = pd.merge(hold_file, final_outcomes, on="Player")
                        final_Proj = final_Proj[['Player', '10%', 'Median', '90%', 'Top_finish', 'Top_5_finish', 'Top_10_finish']]
                        final_Proj.rename(columns={"Median": "Projection"}, inplace = True)
                        
                    with df_hold_container.container():     
                        df_hold_container = st.empty()
                        st.dataframe(final_Proj.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(sim_format, precision=2), use_container_width = True)
    
with tab5:
    if st.button("Reset Data", key='reset5'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    col1, col2 = st.columns([1, 5])
    
    with col2:
        df_hold_container = st.empty()
    
    with col1:
        prop_type_var_h = st.selectbox('Select type of prop to simulate', options = ['Hits', 'Doubles', 'Home Runs', 'RBI', 'Stolen Bases'], key='prop_type_var_h')
        

        if st.button('Simulate Stat', key='sim_h'):
            with col2:
                   
                    with df_hold_container.container():

                        df = hitter_proj.copy()
                        
                        total_sims = 5000

                        df.replace("", 0, inplace=True)

                        if prop_type_var_h == 'Hits':
                            df['Median'] = df['xHits']
                            stat_cap = 250
                        elif prop_type_var_h == 'Doubles':
                            df['Median'] = df['Doubles']
                            stat_cap = 65
                        elif prop_type_var_h == 'Home Runs':
                            df['Median'] = df['Homeruns']
                            stat_cap = 75
                        elif prop_type_var_h == 'RBI':
                            df['Median'] = df['RBI']
                            stat_cap = 150
                        elif prop_type_var_h == 'Stolen Bases':
                            df['Median'] = df['Stolen_bases']
                            stat_cap = 80

                        flex_file = df.copy()
                        flex_file.rename(columns={"Name": "Player"}, inplace = True)
                        flex_file['Floor'] = (flex_file['Median'] * .15)
                        flex_file['Ceiling'] = np.where((flex_file['Median'] + (flex_file['Median'] * flex_file['Ceiling_var'])) > stat_cap, stat_cap + (flex_file['Median']/20), (flex_file['Median'] + (flex_file['Median'] * flex_file['Ceiling_var'])))
                        flex_file['STD'] = (flex_file['Median']/2)
                        flex_file = flex_file[['Player', 'Floor', 'Median', 'Ceiling', 'STD']]

                        hold_file = flex_file.copy()
                        hold_file = hold_file.sort_values(by='Median', ascending=False)
                        overall_file = flex_file.copy()
                        overall_file = overall_file.sort_values(by='Median', ascending=False)

                        overall_players = overall_file[['Player']]

                        for x in range(0,total_sims):
                            overall_file['g'] = np.random.gumbel(overall_file['Median'] * .5,overall_file['STD'])
                            overall_file[x] = np.where((overall_file['g']<=overall_file['Ceiling']),overall_file['g'],overall_file['Ceiling'])

                        check_file = overall_file.copy()
                        overall_file=overall_file.drop(['Player', 'Floor', 'Median', 'Ceiling', 'STD', 'g'], axis=1)
                        overall_file.astype('int').dtypes

                        players_only = hold_file[['Player']]
                        raw_lineups_file = players_only.copy()

                        for x in range(0,total_sims):
                            maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
                            raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
                            players_only[x] = raw_lineups_file[x].rank(ascending=False)

                        players_only=players_only.drop(['Player'], axis=1)
                        players_only.astype('int').dtypes

                        players_only['Average_Rank'] = players_only.mean(axis=1)
                        players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
                        players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
                        players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
                        players_only['10%'] = overall_file.quantile(0.1, axis=1)
                        players_only['90%'] = overall_file.quantile(0.9, axis=1)

                        players_only['Player'] = hold_file[['Player']]

                        final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '10%', '90%']]

                        final_Proj = pd.merge(hold_file, final_outcomes, on="Player")
                        final_Proj = final_Proj[['Player', '10%', 'Median', '90%', 'Top_finish', 'Top_5_finish', 'Top_10_finish']]
                        final_Proj.rename(columns={"Median": "Projection"}, inplace = True)

                    with df_hold_container.container():     
                        df_hold_container = st.empty()
                        st.dataframe(final_Proj.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(sim_format, precision=2), use_container_width = True)