MLB_DFS_ROO / app.py
James McCool
Refactor app.py to streamline the initialization process by moving the page configuration to the top and removing unnecessary global variable deletions, enhancing code clarity and organization.
d077a00
raw
history blame
24.3 kB
import streamlit as st
import numpy as np
import pandas as pd
import pymongo
st.set_page_config(layout="wide")
@st.cache_resource
def init_conn():
uri = st.secrets['mongo_uri']
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
db = client["MLB_Database"]
db2 = client["MLB_DFS"]
return db, db2
db, db2 = init_conn()
game_format = {'Win Percentage': '{:.2%}','First Inning Lead Percentage': '{:.2%}',
'Fifth Inning Lead Percentage': '{:.2%}', '8+ runs': '{:.2%}', 'DK LevX': '{:.2%}', 'FD LevX': '{:.2%}'}
player_roo_format = {'Top_finish': '{:.2%}','Top_5_finish': '{:.2%}', 'Top_10_finish': '{:.2%}', '20+%': '{:.2%}', '2x%': '{:.2%}', '3x%': '{:.2%}',
'4x%': '{:.2%}'}
dk_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6', 'salary', 'proj', 'Own']
fd_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6', 'salary', 'proj', 'Own']
st.markdown("""
<style>
/* Tab styling */
.stTabs [data-baseweb="tab-list"] {
gap: 8px;
padding: 4px;
}
.stTabs [data-baseweb="tab"] {
height: 50px;
white-space: pre-wrap;
background-color: #DAA520;
color: white;
border-radius: 10px;
gap: 1px;
padding: 10px 20px;
font-weight: bold;
transition: all 0.3s ease;
}
.stTabs [aria-selected="true"] {
background-color: #DAA520;
border: 3px solid #FFD700;
color: white;
}
.stTabs [data-baseweb="tab"]:hover {
background-color: #FFD700;
cursor: pointer;
}
</style>""", unsafe_allow_html=True)
@st.cache_resource(ttl = 60)
def init_baselines():
collection = db["Player_Range_Of_Outcomes"]
cursor = collection.find()
player_frame = pd.DataFrame(cursor)
roo_data = player_frame.drop(columns=['_id'])
roo_data['Salary'] = roo_data['Salary'].astype(int)
collection = db["Player_SD_Range_Of_Outcomes"]
cursor = collection.find()
player_frame = pd.DataFrame(cursor)
sd_roo_data = player_frame.drop(columns=['_id'])
sd_roo_data['Salary'] = sd_roo_data['Salary'].astype(int)
collection = db["Scoring_Percentages"]
cursor = collection.find()
team_frame = pd.DataFrame(cursor)
scoring_percentages = team_frame.drop(columns=['_id'])
scoring_percentages = scoring_percentages[['Names', 'Avg First Inning', 'First Inning Lead Percentage', 'Avg Fifth Inning', 'Fifth Inning Lead Percentage', 'Avg Score', '8+ runs', 'Win Percentage']]
scoring_percentages['8+ runs'] = scoring_percentages['8+ runs'].replace('%', '', regex=True).astype(float) / 100
scoring_percentages['Win Percentage'] = scoring_percentages['Win Percentage'].replace('%', '', regex=True).astype(float) / 100
return roo_data, sd_roo_data, scoring_percentages
@st.cache_data(ttl = 60)
def init_DK_lineups():
collection = db2['DK_MLB_SD1_seed_frame']
cursor = collection.find().limit(10000)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'salary', 'proj', 'Own']]
DK_seed = raw_display.to_numpy()
return DK_seed
@st.cache_data(ttl = 60)
def init_FD_lineups():
collection = db2['FD_MLB_SD1_seed_frame']
cursor = collection.find().limit(10000)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'salary', 'proj', 'Own']]
FD_seed = raw_display.to_numpy()
return FD_seed
def convert_df_to_csv(df):
return df.to_csv().encode('utf-8')
@st.cache_data
def convert_df(array):
array = pd.DataFrame(array, columns=column_names)
return array.to_csv().encode('utf-8')
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
tab1, tab2, tab3 = st.tabs(["Scoring Percentages", "Player ROO", "Optimals"])
with tab1:
st.header("Scoring Percentages")
with st.container():
col1, col2 = st.columns([3, 3])
with col1:
view_var1 = st.selectbox("Select view", ["Simple", "Advanced"], key='view_var1')
with col2:
site_var1 = st.selectbox("What site do you want to view?", ('Draftkings', 'Fanduel'), key='site_var1')
with st.expander("Info and Filters"):
col1, col2, col3 = st.columns([3, 3, 3])
with col1:
if st.button("Load/Reset Data", key='reset1'):
st.cache_data.clear()
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
dk_lineups = init_DK_lineups('Main')
fd_lineups = init_FD_lineups('Main')
for key in st.session_state.keys():
del st.session_state[key]
with col2:
slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate', 'All Games'))
with col3:
own_var1 = st.radio("How would you like to display team ownership?", ('Sum', 'Average'))
st.title("Scoring Percentages")
if view_var1 == "Simple":
scoring_percentages = scoring_percentages[['Names', 'Avg Score', '8+ runs', 'Win Percentage']]
st.dataframe(scoring_percentages.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(game_format, precision=2), height=750, use_container_width = True, hide_index=True)
elif view_var1 == "Advanced":
st.dataframe(scoring_percentages.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(game_format, precision=2), height=750, use_container_width = True, hide_index=True)
with tab2:
st.header("Player ROO")
with st.container():
col1, col2 = st.columns([3, 3])
with col1:
view_var2 = st.selectbox("Select view", ["Simple", "Advanced"], key='view_var2')
with col2:
site_var2 = st.selectbox("What site do you want to view?", ('Draftkings', 'Fanduel'), key='site_var2')
with st.expander("Info and Filters"):
col1, col2, col3, col4 = st.columns([3, 3, 3, 3])
with col1:
if st.button("Load/Reset Data", key='reset2'):
st.cache_data.clear()
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
dk_lineups = init_DK_lineups('Main')
fd_lineups = init_FD_lineups('Main')
for key in st.session_state.keys():
del st.session_state[key]
with col2:
slate_type_var2 = st.radio("Which slate type are you loading?", ('Regular', 'Showdown'))
with col3:
slate_var2 = st.radio("Which slate data are you loading?", ('Main', 'Secondary', 'Auxiliary'))
with col4:
pos_var2 = st.radio("Which position group would you like to view?", ('All', 'Pitchers', 'Hitters'))
if slate_type_var2 == 'Regular':
player_roo_raw = roo_data.copy()
if site_var2 == 'Draftkings':
player_roo_raw['Site'] = 'Draftkings'
if pos_var2 == 'All':
pass
elif pos_var2 == 'Pitchers':
player_roo_raw = player_roo_raw[player_roo_raw['Position'] == 'SP']
elif pos_var2 == 'Hitters':
player_roo_raw = player_roo_raw[player_roo_raw['Position'] != 'SP']
elif site_var2 == 'Fanduel':
player_roo_raw['Site'] = 'Fanduel'
if pos_var2 == 'All':
pass
elif pos_var2 == 'Pitchers':
player_roo_raw = player_roo_raw[player_roo_raw['Position'] == 'P']
elif pos_var2 == 'Hitters':
player_roo_raw = player_roo_raw[player_roo_raw['Position'] != 'P']
if slate_var2 == 'Main':
player_roo_raw = player_roo_raw[player_roo_raw['Slate'] == 'Main']
elif slate_var2 == 'Secondary':
player_roo_raw = player_roo_raw[player_roo_raw['Slate'] == 'Secondary']
elif slate_var2 == 'Auxiliary':
player_roo_raw = player_roo_raw[player_roo_raw['Slate'] == 'Auxiliary']
elif slate_type_var2 == 'Showdown':
player_roo_raw = sd_roo_data.copy()
if site_var2 == 'Draftkings':
player_roo_raw['Site'] = 'Draftkings'
elif site_var2 == 'Fanduel':
player_roo_raw['Site'] = 'Fanduel'
st.session_state['player_roo'] = player_roo_raw.drop(columns=['site', 'slate', 'version', 'timestamp'])
if view_var2 == "Simple":
st.session_state['player_roo'] = st.session_state['player_roo'][['Player', 'Position', 'Salary', 'Median', 'Ceiling', 'Own']]
st.dataframe(st.session_state['player_roo'].style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(player_roo_format, precision=2), height=750, use_container_width = True, hide_index=True)
elif view_var2 == "Advanced":
st.dataframe(st.session_state['player_roo'].style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(player_roo_format, precision=2), height=750, use_container_width = True, hide_index=True)
with tab3:
st.header("Optimals")
with st.container():
col1, col2 = st.columns([3, 3])
with col1:
site_var3 = st.selectbox("What site do you want to view?", ('Draftkings', 'Fanduel'), key='site_var3')
with col2:
view_var3 = st.selectbox("Select view", ["Simple", "Advanced"], key='view_var3')
with st.expander("Info and Filters"):
if st.button("Load/Reset Data", key='reset3'):
st.cache_data.clear()
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
dk_lineups = init_DK_lineups('Main')
fd_lineups = init_FD_lineups('Main')
for key in st.session_state.keys():
del st.session_state[key]
slate_type_var3 = st.radio("Which slate type are you loading?", ('Regular', 'Showdown'))
slate_var3 = st.radio("Which slate data are you loading?", ('Main', 'Secondary', 'Auxiliary'))
if slate_type_var3 == 'Regular':
if site_var3 == 'Draftkings':
dk_lineups = init_DK_lineups(slate_var3)
elif site_var3 == 'Fanduel':
fd_lineups = init_FD_lineups(slate_var3)
elif slate_type_var3 == 'Showdown':
if site_var3 == 'Draftkings':
dk_lineups = init_DK_lineups(slate_var3)
elif site_var3 == 'Fanduel':
fd_lineups = init_FD_lineups(slate_var3)
lineup_num_var = st.number_input("How many lineups do you want to display?", min_value=1, max_value=1000, value=150, step=1)
if slate_type_var3 == 'Regular':
raw_baselines = roo_data
elif slate_type_var3 == 'Showdown':
raw_baselines = sd_roo_data
if site_var3 == 'Draftkings':
if slate_type_var3 == 'Regular':
ROO_slice = raw_baselines[raw_baselines['Site'] == 'Draftkings']
player_salaries = dict(zip(ROO_slice['Player'], ROO_slice['Salary']))
elif slate_type_var3 == 'Showdown':
player_salaries = dict(zip(raw_baselines['Player'], raw_baselines['Salary']))
# Get the minimum and maximum ownership values from dk_lineups
min_own = np.min(dk_lineups[:,8])
max_own = np.max(dk_lineups[:,8])
column_names = dk_columns
player_var1 = st.radio("Do you want a frame with specific Players?", ('Full Slate', 'Specific Players'), key='player_var1')
if player_var1 == 'Specific Players':
player_var2 = st.multiselect('Which players do you want?', options = raw_baselines['Player'].unique())
elif player_var1 == 'Full Slate':
player_var2 = raw_baselines.Player.values.tolist()
elif site_var3 == 'Fanduel':
raw_baselines = hold_display
if slate_type_var3 == 'Regular':
ROO_slice = raw_baselines[raw_baselines['Site'] == 'Fanduel']
player_salaries = dict(zip(ROO_slice['Player'], ROO_slice['Salary']))
elif slate_type_var3 == 'Showdown':
player_salaries = dict(zip(raw_baselines['Player'], raw_baselines['Salary']))
min_own = np.min(fd_lineups[:,8])
max_own = np.max(fd_lineups[:,8])
column_names = fd_columns
player_var1 = st.radio("Do you want a frame with specific Players?", ('Full Slate', 'Specific Players'), key='player_var1')
if player_var1 == 'Specific Players':
player_var2 = st.multiselect('Which players do you want?', options = raw_baselines['Player'].unique())
elif player_var1 == 'Full Slate':
player_var2 = raw_baselines.Player.values.tolist()
if st.button("Prepare data export", key='data_export'):
data_export = st.session_state.working_seed.copy()
# if site_var3 == 'Draftkings':
# for col_idx in range(6):
# data_export[:, col_idx] = np.array([id_dict.get(player, player) for player in data_export[:, col_idx]])
# elif site_var3 == 'Fanduel':
# for col_idx in range(6):
# data_export[:, col_idx] = np.array([id_dict.get(player, player) for player in data_export[:, col_idx]])
st.download_button(
label="Export optimals set",
data=convert_df(data_export),
file_name='MLB_optimals_export.csv',
mime='text/csv',
)
if site_var3 == 'Draftkings':
if 'working_seed' in st.session_state:
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = dk_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif 'working_seed' not in st.session_state:
st.session_state.working_seed = dk_lineups.copy()
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = dk_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif site_var3 == 'Fanduel':
if 'working_seed' in st.session_state:
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = fd_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif 'working_seed' not in st.session_state:
st.session_state.working_seed = fd_lineups.copy()
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = fd_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
export_file = st.session_state.data_export_display.copy()
# if site_var3 == 'Draftkings':
# for col_idx in range(6):
# export_file.iloc[:, col_idx] = export_file.iloc[:, col_idx].map(id_dict)
# elif site_var3 == 'Fanduel':
# for col_idx in range(6):
# export_file.iloc[:, col_idx] = export_file.iloc[:, col_idx].map(id_dict)
with st.container():
if st.button("Reset Optimals", key='reset3'):
for key in st.session_state.keys():
del st.session_state[key]
if site_var3 == 'Draftkings':
st.session_state.working_seed = dk_lineups.copy()
elif site_var3 == 'Fanduel':
st.session_state.working_seed = fd_lineups.copy()
if 'data_export_display' in st.session_state:
st.dataframe(st.session_state.data_export_display.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), height=500, use_container_width = True)
st.download_button(
label="Export display optimals",
data=convert_df(export_file),
file_name='MLB_display_optimals.csv',
mime='text/csv',
)
with st.container():
if 'working_seed' in st.session_state:
# Create a new dataframe with summary statistics
if site_var3 == 'Draftkings':
summary_df = pd.DataFrame({
'Metric': ['Min', 'Average', 'Max', 'STDdev'],
'Salary': [
np.min(st.session_state.working_seed[:,6]),
np.mean(st.session_state.working_seed[:,6]),
np.max(st.session_state.working_seed[:,6]),
np.std(st.session_state.working_seed[:,6])
],
'Proj': [
np.min(st.session_state.working_seed[:,7]),
np.mean(st.session_state.working_seed[:,7]),
np.max(st.session_state.working_seed[:,7]),
np.std(st.session_state.working_seed[:,7])
],
'Own': [
np.min(st.session_state.working_seed[:,8]),
np.mean(st.session_state.working_seed[:,8]),
np.max(st.session_state.working_seed[:,8]),
np.std(st.session_state.working_seed[:,8])
]
})
elif site_var3 == 'Fanduel':
summary_df = pd.DataFrame({
'Metric': ['Min', 'Average', 'Max', 'STDdev'],
'Salary': [
np.min(st.session_state.working_seed[:,6]),
np.mean(st.session_state.working_seed[:,6]),
np.max(st.session_state.working_seed[:,6]),
np.std(st.session_state.working_seed[:,6])
],
'Proj': [
np.min(st.session_state.working_seed[:,7]),
np.mean(st.session_state.working_seed[:,7]),
np.max(st.session_state.working_seed[:,7]),
np.std(st.session_state.working_seed[:,7])
],
'Own': [
np.min(st.session_state.working_seed[:,8]),
np.mean(st.session_state.working_seed[:,8]),
np.max(st.session_state.working_seed[:,8]),
np.std(st.session_state.working_seed[:,8])
]
})
# Set the index of the summary dataframe as the "Metric" column
summary_df = summary_df.set_index('Metric')
# Display the summary dataframe
st.subheader("Optimal Statistics")
st.dataframe(summary_df.style.format({
'Salary': '{:.2f}',
'Proj': '{:.2f}',
'Own': '{:.2f}'
}).background_gradient(cmap='RdYlGn', axis=0, subset=['Salary', 'Proj', 'Own']), use_container_width=True)
with st.container():
tab1, tab2 = st.tabs(["Display Frequency", "Seed Frame Frequency"])
with tab1:
if 'data_export_display' in st.session_state:
if site_var3 == 'Draftkings':
player_columns = st.session_state.data_export_display.iloc[:, :6]
elif site_var3 == 'Fanduel':
player_columns = st.session_state.data_export_display.iloc[:, :6]
# Flatten the DataFrame and count unique values
value_counts = player_columns.values.flatten().tolist()
value_counts = pd.Series(value_counts).value_counts()
percentages = (value_counts / lineup_num_var * 100).round(2)
# Create a DataFrame with the results
summary_df = pd.DataFrame({
'Player': value_counts.index,
'Frequency': value_counts.values,
'Percentage': percentages.values
})
# Sort by frequency in descending order
summary_df['Salary'] = summary_df['Player'].map(player_salaries)
summary_df = summary_df[['Player', 'Salary', 'Frequency', 'Percentage']]
summary_df = summary_df.sort_values('Frequency', ascending=False)
summary_df = summary_df.set_index('Player')
# Display the table
st.write("Player Frequency Table:")
st.dataframe(summary_df.style.format({'Percentage': '{:.2f}%'}), height=500, use_container_width=True)
st.download_button(
label="Export player frequency",
data=convert_df_to_csv(summary_df),
file_name='MLB_player_frequency.csv',
mime='text/csv',
)
with tab2:
if 'working_seed' in st.session_state:
if site_var3 == 'Draftkings':
player_columns = st.session_state.working_seed[:, :6]
elif site_var3 == 'Fanduel':
player_columns = st.session_state.working_seed[:, :6]
# Flatten the DataFrame and count unique values
value_counts = player_columns.flatten().tolist()
value_counts = pd.Series(value_counts).value_counts()
percentages = (value_counts / len(st.session_state.working_seed) * 100).round(2)
# Create a DataFrame with the results
summary_df = pd.DataFrame({
'Player': value_counts.index,
'Frequency': value_counts.values,
'Percentage': percentages.values
})
# Sort by frequency in descending order
summary_df['Salary'] = summary_df['Player'].map(player_salaries)
summary_df = summary_df[['Player', 'Salary', 'Frequency', 'Percentage']]
summary_df = summary_df.sort_values('Frequency', ascending=False)
summary_df = summary_df.set_index('Player')
# Display the table
st.write("Seed Frame Frequency Table:")
st.dataframe(summary_df.style.format({'Percentage': '{:.2f}%'}), height=500, use_container_width=True)
st.download_button(
label="Export seed frame frequency",
data=convert_df_to_csv(summary_df),
file_name='MLB_seed_frame_frequency.csv',
mime='text/csv',
)