MLB_DFS_ROO / app.py
James McCool
Add error handling for player ROO data assignment in app.py to ensure robustness. If dropping columns fails, fallback to the original data, enhancing stability in data processing.
f5c8a2c
raw
history blame
25.9 kB
import streamlit as st
import numpy as np
import pandas as pd
import gspread
import pymongo
st.set_page_config(layout="wide")
@st.cache_resource
def init_conn():
uri = st.secrets['mongo_uri']
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
db = client["MLB_Database"]
db2 = client["MLB_DFS"]
return db, db2
db, db2 = init_conn()
game_format = {'Win Percentage': '{:.2%}','First Inning Lead Percentage': '{:.2%}',
'Fifth Inning Lead Percentage': '{:.2%}', '8+ runs': '{:.2%}', 'DK LevX': '{:.2%}', 'FD LevX': '{:.2%}'}
player_roo_format = {'Top_finish': '{:.2%}','Top_5_finish': '{:.2%}', 'Top_10_finish': '{:.2%}', '20+%': '{:.2%}', '2x%': '{:.2%}', '3x%': '{:.2%}',
'4x%': '{:.2%}'}
dk_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6', 'salary', 'proj', 'Own']
fd_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6', 'salary', 'proj', 'Own']
st.markdown("""
<style>
/* Tab styling */
.stTabs [data-baseweb="tab-list"] {
gap: 8px;
padding: 4px;
}
.stTabs [data-baseweb="tab"] {
height: 50px;
white-space: pre-wrap;
background-color: #DAA520;
color: white;
border-radius: 10px;
gap: 1px;
padding: 10px 20px;
font-weight: bold;
transition: all 0.3s ease;
}
.stTabs [aria-selected="true"] {
background-color: #DAA520;
border: 3px solid #FFD700;
color: white;
}
.stTabs [data-baseweb="tab"]:hover {
background-color: #FFD700;
cursor: pointer;
}
div[data-baseweb="select"] > div {
background-color:rgb(134, 255, 164);
}
</style>""", unsafe_allow_html=True)
@st.cache_resource(ttl = 60)
def init_baselines():
collection = db["Player_Range_Of_Outcomes"]
cursor = collection.find()
player_frame = pd.DataFrame(cursor)
roo_data = player_frame.drop(columns=['_id'])
roo_data['Salary'] = roo_data['Salary'].astype(int)
collection = db["Player_SD_Range_Of_Outcomes"]
cursor = collection.find()
player_frame = pd.DataFrame(cursor)
sd_roo_data = player_frame.drop(columns=['_id'])
sd_roo_data['Salary'] = sd_roo_data['Salary'].astype(int)
collection = db["Scoring_Percentages"]
cursor = collection.find()
team_frame = pd.DataFrame(cursor)
scoring_percentages = team_frame.drop(columns=['_id'])
scoring_percentages = scoring_percentages[['Names', 'Avg First Inning', 'First Inning Lead Percentage', 'Avg Fifth Inning', 'Fifth Inning Lead Percentage', 'Avg Score', '8+ runs', 'Win Percentage']]
scoring_percentages['8+ runs'] = scoring_percentages['8+ runs'].replace('%', '', regex=True).astype(float) / 100
scoring_percentages['Win Percentage'] = scoring_percentages['Win Percentage'].replace('%', '', regex=True).astype(float) / 100
return roo_data, sd_roo_data, scoring_percentages
@st.cache_data(ttl = 60)
def init_DK_lineups(type_var, slate_var):
if type_var == 'Regular':
if slate_var == 'Main':
collection = db2['DK_MLB_seed_frame']
cursor = collection.find().limit(10000)
elif slate_var == 'Secondary':
collection = db2['DK_MLB_Secondary_seed_frame']
cursor = collection.find().limit(10000)
elif slate_var == 'Auxiliary':
collection = db2['DK_MLB_Turbo_seed_frame']
cursor = collection.find().limit(10000)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'salary', 'proj', 'Own']]
elif type_var == 'Showdown':
if slate_var == 'Main':
collection = db2['DK_MLB_SD1_seed_frame']
cursor = collection.find().limit(10000)
elif slate_var == 'Secondary':
collection = db2['DK_MLB_SD2_seed_frame']
cursor = collection.find().limit(10000)
elif slate_var == 'Auxiliary':
collection = db2['DK_MLB_SD3_seed_frame']
cursor = collection.find().limit(10000)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'salary', 'proj', 'Own']]
DK_seed = raw_display.to_numpy()
return DK_seed
@st.cache_data(ttl = 60)
def init_FD_lineups(type_var,slate_var):
if type_var == 'Regular':
if slate_var == 'Main':
collection = db2['FD_MLB_seed_frame']
cursor = collection.find().limit(10000)
elif slate_var == 'Secondary':
collection = db2['FD_MLB_Secondary_seed_frame']
cursor = collection.find().limit(10000)
elif slate_var == 'Auxiliary':
collection = db2['FD_MLB_Turbo_seed_frame']
cursor = collection.find().limit(10000)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['P', 'C_1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'UTIL', 'salary', 'proj', 'Own']]
elif type_var == 'Showdown':
if slate_var == 'Main':
collection = db2['FD_MLB_SD1_seed_frame']
cursor = collection.find().limit(10000)
elif slate_var == 'Secondary':
collection = db2['FD_MLB_SD2_seed_frame']
cursor = collection.find().limit(10000)
elif slate_var == 'Auxiliary':
collection = db2['FD_MLB_SD3_seed_frame']
cursor = collection.find().limit(10000)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'salary', 'proj', 'Own']]
FD_seed = raw_display.to_numpy()
return FD_seed
@st.cache_data
def convert_df_to_csv(df):
return df.to_csv().encode('utf-8')
@st.cache_data
def convert_df(array):
array = pd.DataFrame(array, columns=column_names)
return array.to_csv().encode('utf-8')
col1, col2 = st.columns([1, 9])
with col1:
if st.button("Load/Reset Data", key='reset'):
st.cache_data.clear()
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
dk_lineups = init_DK_lineups('Regular', 'Main')
fd_lineups = init_FD_lineups('Regular', 'Main')
for key in st.session_state.keys():
del st.session_state[key]
with col2:
with st.form("Data Load"):
col1, col2 = st.columns([3, 3])
with col1:
view_var = st.selectbox("Select view", ["Simple", "Advanced"], key='view_var')
with col2:
site_var = st.selectbox("What site do you want to view?", ('Draftkings', 'Fanduel'), key='site_var')
submit_button = st.form_submit_button("Submit")
tab1, tab2, tab3 = st.tabs(["Scoring Percentages", "Player ROO", "Optimals"])
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
with tab1:
st.header("Scoring Percentages")
with st.expander("Info and Filters"):
with st.container():
slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate', 'All Games'), key='slate_var1')
own_var1 = st.radio("How would you like to display team ownership?", ('Sum', 'Average'), key='own_var1')
st.title("Scoring Percentages")
if view_var == "Simple":
scoring_percentages = scoring_percentages[['Names', 'Avg Score', '8+ runs', 'Win Percentage']]
st.dataframe(scoring_percentages.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(game_format, precision=2), height=750, use_container_width = True, hide_index=True)
elif view_var == "Advanced":
st.dataframe(scoring_percentages.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(game_format, precision=2), height=750, use_container_width = True, hide_index=True)
with tab2:
st.header("Player ROO")
with st.expander("Info and Filters"):
with st.container():
slate_type_var2 = st.radio("Which slate type are you loading?", ('Regular', 'Showdown'), key='slate_type_var2')
slate_var2 = st.radio("Which slate data are you loading?", ('Main', 'Secondary', 'Auxiliary'), key='slate_var2')
pos_var2 = st.radio("Which position group would you like to view?", ('All', 'Pitchers', 'Hitters'), key='pos_var2')
team_var2 = st.selectbox("Which team would you like to view?", ['All', 'Specific'], key='team_var2')
if team_var2 == 'Specific':
team_select2 = st.multiselect("Which team would you like to view?", roo_data['Team'].unique(), key='team_select2')
else:
team_select2 = None
if slate_type_var2 == 'Regular':
player_roo_raw = roo_data.copy()
if site_var == 'Draftkings':
player_roo_raw['Site'] = 'Draftkings'
if pos_var2 == 'All':
pass
elif pos_var2 == 'Pitchers':
player_roo_raw = player_roo_raw[player_roo_raw['Position'] == 'SP']
elif pos_var2 == 'Hitters':
player_roo_raw = player_roo_raw[player_roo_raw['Position'] != 'SP']
elif site_var == 'Fanduel':
player_roo_raw['Site'] = 'Fanduel'
if pos_var2 == 'All':
pass
elif pos_var2 == 'Pitchers':
player_roo_raw = player_roo_raw[player_roo_raw['Position'] == 'P']
elif pos_var2 == 'Hitters':
player_roo_raw = player_roo_raw[player_roo_raw['Position'] != 'P']
if slate_var2 == 'Main':
player_roo_raw = player_roo_raw[player_roo_raw['Slate'] == 'Main']
elif slate_var2 == 'Secondary':
player_roo_raw = player_roo_raw[player_roo_raw['Slate'] == 'Secondary']
elif slate_var2 == 'Auxiliary':
player_roo_raw = player_roo_raw[player_roo_raw['Slate'] == 'Auxiliary']
elif slate_type_var2 == 'Showdown':
player_roo_raw = sd_roo_data.copy()
if site_var == 'Draftkings':
player_roo_raw['Site'] = 'Draftkings'
elif site_var == 'Fanduel':
player_roo_raw['Site'] = 'Fanduel'
if team_select2:
player_roo_raw = player_roo_raw[player_roo_raw['Team'].isin(team_select2)]
try:
st.session_state['player_roo'] = player_roo_raw.drop(columns=['site', 'slate', 'version', 'timestamp'])
except:
st.session_state['player_roo'] = player_roo_raw
if view_var == "Simple":
st.session_state['player_roo'] = st.session_state['player_roo'][['Player', 'Position', 'Salary', 'Median', 'Ceiling', 'Own']]
st.dataframe(st.session_state['player_roo'].style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(player_roo_format, precision=2), height=750, use_container_width = True, hide_index=True)
elif view_var == "Advanced":
st.dataframe(st.session_state['player_roo'].style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(player_roo_format, precision=2), height=750, use_container_width = True, hide_index=True)
with tab3:
st.header("Optimals")
with st.expander("Info and Filters"):
with st.container():
slate_type_var3 = st.radio("Which slate type are you loading?", ('Regular', 'Showdown'), key='slate_type_var3')
slate_var3 = st.radio("Which slate data are you loading?", ('Main', 'Secondary', 'Auxiliary'), key='slate_var3')
if slate_type_var3 == 'Regular':
if site_var == 'Draftkings':
dk_lineups = init_DK_lineups(slate_type_var3, slate_var3)
elif site_var == 'Fanduel':
fd_lineups = init_FD_lineups(slate_type_var3, slate_var3)
elif slate_type_var3 == 'Showdown':
if site_var == 'Draftkings':
dk_lineups = init_DK_lineups(slate_type_var3, slate_var3)
elif site_var == 'Fanduel':
fd_lineups = init_FD_lineups(slate_type_var3, slate_var3)
lineup_num_var = st.number_input("How many lineups do you want to display?", min_value=1, max_value=1000, value=150, step=1)
if slate_type_var3 == 'Regular':
raw_baselines = roo_data
elif slate_type_var3 == 'Showdown':
raw_baselines = sd_roo_data
if site_var == 'Draftkings':
if slate_type_var3 == 'Regular':
ROO_slice = raw_baselines[raw_baselines['Site'] == 'Draftkings']
player_salaries = dict(zip(ROO_slice['Player'], ROO_slice['Salary']))
elif slate_type_var3 == 'Showdown':
player_salaries = dict(zip(raw_baselines['Player'], raw_baselines['Salary']))
# Get the minimum and maximum ownership values from dk_lineups
min_own = np.min(dk_lineups[:,8])
max_own = np.max(dk_lineups[:,8])
column_names = dk_columns
player_var1 = st.radio("Do you want a frame with specific Players?", ('Full Slate', 'Specific Players'), key='player_var1')
if player_var1 == 'Specific Players':
player_var2 = st.multiselect('Which players do you want?', options = raw_baselines['Player'].unique())
elif player_var1 == 'Full Slate':
player_var2 = raw_baselines.Player.values.tolist()
elif site_var == 'Fanduel':
raw_baselines = hold_display
if slate_type_var3 == 'Regular':
ROO_slice = raw_baselines[raw_baselines['Site'] == 'Fanduel']
player_salaries = dict(zip(ROO_slice['Player'], ROO_slice['Salary']))
elif slate_type_var3 == 'Showdown':
player_salaries = dict(zip(raw_baselines['Player'], raw_baselines['Salary']))
min_own = np.min(fd_lineups[:,8])
max_own = np.max(fd_lineups[:,8])
column_names = fd_columns
player_var1 = st.radio("Do you want a frame with specific Players?", ('Full Slate', 'Specific Players'), key='player_var1')
if player_var1 == 'Specific Players':
player_var2 = st.multiselect('Which players do you want?', options = raw_baselines['Player'].unique())
elif player_var1 == 'Full Slate':
player_var2 = raw_baselines.Player.values.tolist()
if st.button("Prepare data export", key='data_export'):
data_export = st.session_state.working_seed.copy()
# if site_var == 'Draftkings':
# for col_idx in range(6):
# data_export[:, col_idx] = np.array([id_dict.get(player, player) for player in data_export[:, col_idx]])
# elif site_var == 'Fanduel':
# for col_idx in range(6):
# data_export[:, col_idx] = np.array([id_dict.get(player, player) for player in data_export[:, col_idx]])
st.download_button(
label="Export optimals set",
data=convert_df(data_export),
file_name='MLB_optimals_export.csv',
mime='text/csv',
)
if site_var == 'Draftkings':
if 'working_seed' in st.session_state:
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = dk_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif 'working_seed' not in st.session_state:
st.session_state.working_seed = dk_lineups.copy()
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = dk_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif site_var == 'Fanduel':
if 'working_seed' in st.session_state:
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = fd_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif 'working_seed' not in st.session_state:
st.session_state.working_seed = fd_lineups.copy()
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = fd_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
export_file = st.session_state.data_export_display.copy()
# if site_var == 'Draftkings':
# for col_idx in range(6):
# export_file.iloc[:, col_idx] = export_file.iloc[:, col_idx].map(id_dict)
# elif site_var == 'Fanduel':
# for col_idx in range(6):
# export_file.iloc[:, col_idx] = export_file.iloc[:, col_idx].map(id_dict)
with st.container():
if st.button("Reset Optimals", key='reset3'):
for key in st.session_state.keys():
del st.session_state[key]
if site_var == 'Draftkings':
st.session_state.working_seed = dk_lineups.copy()
elif site_var == 'Fanduel':
st.session_state.working_seed = fd_lineups.copy()
if 'data_export_display' in st.session_state:
st.dataframe(st.session_state.data_export_display.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), height=500, use_container_width = True)
st.download_button(
label="Export display optimals",
data=convert_df(export_file),
file_name='MLB_display_optimals.csv',
mime='text/csv',
)
with st.container():
if 'working_seed' in st.session_state:
# Create a new dataframe with summary statistics
if site_var == 'Draftkings':
summary_df = pd.DataFrame({
'Metric': ['Min', 'Average', 'Max', 'STDdev'],
'Salary': [
np.min(st.session_state.working_seed[:,6]),
np.mean(st.session_state.working_seed[:,6]),
np.max(st.session_state.working_seed[:,6]),
np.std(st.session_state.working_seed[:,6])
],
'Proj': [
np.min(st.session_state.working_seed[:,7]),
np.mean(st.session_state.working_seed[:,7]),
np.max(st.session_state.working_seed[:,7]),
np.std(st.session_state.working_seed[:,7])
],
'Own': [
np.min(st.session_state.working_seed[:,8]),
np.mean(st.session_state.working_seed[:,8]),
np.max(st.session_state.working_seed[:,8]),
np.std(st.session_state.working_seed[:,8])
]
})
elif site_var == 'Fanduel':
summary_df = pd.DataFrame({
'Metric': ['Min', 'Average', 'Max', 'STDdev'],
'Salary': [
np.min(st.session_state.working_seed[:,6]),
np.mean(st.session_state.working_seed[:,6]),
np.max(st.session_state.working_seed[:,6]),
np.std(st.session_state.working_seed[:,6])
],
'Proj': [
np.min(st.session_state.working_seed[:,7]),
np.mean(st.session_state.working_seed[:,7]),
np.max(st.session_state.working_seed[:,7]),
np.std(st.session_state.working_seed[:,7])
],
'Own': [
np.min(st.session_state.working_seed[:,8]),
np.mean(st.session_state.working_seed[:,8]),
np.max(st.session_state.working_seed[:,8]),
np.std(st.session_state.working_seed[:,8])
]
})
# Set the index of the summary dataframe as the "Metric" column
summary_df = summary_df.set_index('Metric')
# Display the summary dataframe
st.subheader("Optimal Statistics")
st.dataframe(summary_df.style.format({
'Salary': '{:.2f}',
'Proj': '{:.2f}',
'Own': '{:.2f}'
}).background_gradient(cmap='RdYlGn', axis=0, subset=['Salary', 'Proj', 'Own']), use_container_width=True)
with st.container():
tab1, tab2 = st.tabs(["Display Frequency", "Seed Frame Frequency"])
with tab1:
if 'data_export_display' in st.session_state:
if site_var == 'Draftkings':
player_columns = st.session_state.data_export_display.iloc[:, :6]
elif site_var == 'Fanduel':
player_columns = st.session_state.data_export_display.iloc[:, :6]
# Flatten the DataFrame and count unique values
value_counts = player_columns.values.flatten().tolist()
value_counts = pd.Series(value_counts).value_counts()
percentages = (value_counts / lineup_num_var * 100).round(2)
# Create a DataFrame with the results
summary_df = pd.DataFrame({
'Player': value_counts.index,
'Frequency': value_counts.values,
'Percentage': percentages.values
})
# Sort by frequency in descending order
summary_df['Salary'] = summary_df['Player'].map(player_salaries)
summary_df = summary_df[['Player', 'Salary', 'Frequency', 'Percentage']]
summary_df = summary_df.sort_values('Frequency', ascending=False)
summary_df = summary_df.set_index('Player')
# Display the table
st.write("Player Frequency Table:")
st.dataframe(summary_df.style.format({'Percentage': '{:.2f}%'}), height=500, use_container_width=True)
st.download_button(
label="Export player frequency",
data=convert_df_to_csv(summary_df),
file_name='MLB_player_frequency.csv',
mime='text/csv',
)
with tab2:
if 'working_seed' in st.session_state:
if site_var == 'Draftkings':
player_columns = st.session_state.working_seed[:, :6]
elif site_var == 'Fanduel':
player_columns = st.session_state.working_seed[:, :6]
# Flatten the DataFrame and count unique values
value_counts = player_columns.flatten().tolist()
value_counts = pd.Series(value_counts).value_counts()
percentages = (value_counts / len(st.session_state.working_seed) * 100).round(2)
# Create a DataFrame with the results
summary_df = pd.DataFrame({
'Player': value_counts.index,
'Frequency': value_counts.values,
'Percentage': percentages.values
})
# Sort by frequency in descending order
summary_df['Salary'] = summary_df['Player'].map(player_salaries)
summary_df = summary_df[['Player', 'Salary', 'Frequency', 'Percentage']]
summary_df = summary_df.sort_values('Frequency', ascending=False)
summary_df = summary_df.set_index('Player')
# Display the table
st.write("Seed Frame Frequency Table:")
st.dataframe(summary_df.style.format({'Percentage': '{:.2f}%'}), height=500, use_container_width=True)
st.download_button(
label="Export seed frame frequency",
data=convert_df_to_csv(summary_df),
file_name='MLB_seed_frame_frequency.csv',
mime='text/csv',
)