MLB_DFS_ROO / app.py
James McCool
Update scoring percentages in app.py to convert percentage strings to decimal format for '8+ runs' and 'Win Percentage', enhancing data accuracy and consistency.
6fa8eec
raw
history blame
20.6 kB
import streamlit as st
st.set_page_config(layout="wide")
for name in dir():
if not name.startswith('_'):
del globals()[name]
import numpy as np
import pandas as pd
import streamlit as st
import gspread
import pymongo
@st.cache_resource
def init_conn():
uri = st.secrets['mongo_uri']
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
db = client["MLB_Database"]
db2 = client["MLB_DFS"]
return db, db2
db, db2 = init_conn()
game_format = {'Win Percentage': '{:.2%}','First Inning Lead Percentage': '{:.2%}',
'Fifth Inning Lead Percentage': '{:.2%}', '8+ runs': '{:.2%}', 'DK LevX': '{:.2%}', 'FD LevX': '{:.2%}'}
player_roo_format = {'Top_finish': '{:.2%}','Top_5_finish': '{:.2%}', 'Top_10_finish': '{:.2%}', '20+%': '{:.2%}', '2x%': '{:.2%}', '3x%': '{:.2%}',
'4x%': '{:.2%}'}
dk_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6', 'salary', 'proj', 'Own']
fd_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6', 'salary', 'proj', 'Own']
st.markdown("""
<style>
/* Tab styling */
.stTabs [data-baseweb="tab-list"] {
gap: 8px;
padding: 4px;
}
.stTabs [data-baseweb="tab"] {
height: 50px;
white-space: pre-wrap;
background-color: #DAA520;
color: white;
border-radius: 10px;
gap: 1px;
padding: 10px 20px;
font-weight: bold;
transition: all 0.3s ease;
}
.stTabs [aria-selected="true"] {
background-color: #DAA520;
border: 3px solid #FFD700;
color: white;
}
.stTabs [data-baseweb="tab"]:hover {
background-color: #FFD700;
cursor: pointer;
}
</style>""", unsafe_allow_html=True)
@st.cache_resource(ttl = 60)
def init_baselines():
collection = db["Player_Range_Of_Outcomes"]
cursor = collection.find()
player_frame = pd.DataFrame(cursor)
roo_data = player_frame.drop(columns=['_id'])
roo_data['Salary'] = roo_data['Salary'].astype(int)
collection = db["Player_SD_Range_Of_Outcomes"]
cursor = collection.find()
player_frame = pd.DataFrame(cursor)
sd_roo_data = player_frame.drop(columns=['_id'])
sd_roo_data['Salary'] = sd_roo_data['Salary'].astype(int)
collection = db["Scoring_Percentages"]
cursor = collection.find()
team_frame = pd.DataFrame(cursor)
scoring_percentages = team_frame.drop(columns=['_id'])
scoring_percentages = scoring_percentages[['Names', 'Avg First Inning', 'First Inning Lead Percentage', 'Avg Fifth Inning', 'Fifth Inning Lead Percentage', 'Avg Score', '8+ runs', 'Win Percentage']]
scoring_percentages['8+ runs'] = scoring_percentages['8+ runs'].replace('%', '', regex=True).astype(float) / 100
scoring_percentages['Win Percentage'] = scoring_percentages['Win Percentage'].replace('%', '', regex=True).astype(float) / 100
return roo_data, sd_roo_data, scoring_percentages
@st.cache_data(ttl = 60)
def init_DK_lineups():
collection = db2['DK_MLB_SD1_seed_frame']
cursor = collection.find().limit(10000)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'salary', 'proj', 'Own']]
DK_seed = raw_display.to_numpy()
return DK_seed
@st.cache_data(ttl = 60)
def init_FD_lineups():
collection = db2['FD_MLB_SD1_seed_frame']
cursor = collection.find().limit(10000)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'salary', 'proj', 'Own']]
FD_seed = raw_display.to_numpy()
return FD_seed
def convert_df_to_csv(df):
return df.to_csv().encode('utf-8')
@st.cache_data
def convert_df(array):
array = pd.DataFrame(array, columns=column_names)
return array.to_csv().encode('utf-8')
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
view_var = st.radio("Select view", ["Simple", "Advanced"])
tab1, tab2, tab3 = st.tabs(["Scoring Percentages", "Player ROO", "Optimals"])
with tab1:
with st.expander("Info and Filters"):
col1, col2, col3, col4 = st.columns([3, 3, 3, 3])
with col1:
if st.button("Load/Reset Data", key='reset1'):
st.cache_data.clear()
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
with col2:
site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='site_var1')
with col3:
slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate', 'All Games'), key='slate_var1')
with col4:
own_var1 = st.radio("How would you like to display team ownership?", ('Sum', 'Average'), key='own_var1')
st.title("Scoring Percentages")
st.dataframe(scoring_percentages.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(game_format, precision=2), height=750, use_container_width = True, hide_index=True)
with tab2:
st.title("Player ROO")
with st.expander("Info and Filters"):
site_var = st.radio("Select site", ["Draftkings", "Fanduel"])
if site_var == "Draftkings":
display_data = sd_roo_data[sd_roo_data['site'] == 'Draftkings']
display_data = display_data[display_data['slate'] == 'DK SD1']
elif site_var == "Fanduel":
display_data = sd_roo_data[sd_roo_data['site'] == 'Fanduel']
display_data = display_data[display_data['slate'] == 'FD SD1']
display_data = display_data.drop(columns=['site', 'slate', 'version', 'timestamp'])
st.dataframe(display_data.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(player_roo_format, precision=2), height=750, use_container_width = True, hide_index=True)
with tab3:
with st.expander("Info and Filters"):
if st.button("Load/Reset Data", key='reset2'):
st.cache_data.clear()
roo_data, sd_roo_data, scoring_percentages = init_baselines()
hold_display = roo_data
dk_lineups = init_DK_lineups()
fd_lineups = init_FD_lineups()
for key in st.session_state.keys():
del st.session_state[key]
slate_var1 = st.radio("Which data are you loading?", ('Regular', 'Showdown'))
site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'))
if slate_var1 == 'Regular':
if site_var1 == 'Draftkings':
dk_lineups = init_DK_lineups()
elif site_var1 == 'Fanduel':
fd_lineups = init_FD_lineups()
elif slate_var1 == 'Showdown':
if site_var1 == 'Draftkings':
dk_lineups = init_DK_lineups()
elif site_var1 == 'Fanduel':
fd_lineups = init_FD_lineups()
lineup_num_var = st.number_input("How many lineups do you want to display?", min_value=1, max_value=1000, value=150, step=1)
if slate_var1 == 'Regular':
raw_baselines = roo_data
elif slate_var1 == 'Showdown':
raw_baselines = sd_roo_data
if site_var1 == 'Draftkings':
if slate_var1 == 'Regular':
ROO_slice = raw_baselines[raw_baselines['Site'] == 'Draftkings']
player_salaries = dict(zip(ROO_slice['Player'], ROO_slice['Salary']))
elif slate_var1 == 'Showdown':
player_salaries = dict(zip(raw_baselines['Player'], raw_baselines['Salary']))
# Get the minimum and maximum ownership values from dk_lineups
min_own = np.min(dk_lineups[:,8])
max_own = np.max(dk_lineups[:,8])
column_names = dk_columns
player_var1 = st.radio("Do you want a frame with specific Players?", ('Full Slate', 'Specific Players'), key='player_var1')
if player_var1 == 'Specific Players':
player_var2 = st.multiselect('Which players do you want?', options = raw_baselines['Player'].unique())
elif player_var1 == 'Full Slate':
player_var2 = raw_baselines.Player.values.tolist()
elif site_var1 == 'Fanduel':
raw_baselines = hold_display
if slate_var1 == 'Regular':
ROO_slice = raw_baselines[raw_baselines['Site'] == 'Fanduel']
player_salaries = dict(zip(ROO_slice['Player'], ROO_slice['Salary']))
elif slate_var1 == 'Showdown':
player_salaries = dict(zip(raw_baselines['Player'], raw_baselines['Salary']))
min_own = np.min(fd_lineups[:,8])
max_own = np.max(fd_lineups[:,8])
column_names = fd_columns
player_var1 = st.radio("Do you want a frame with specific Players?", ('Full Slate', 'Specific Players'), key='player_var1')
if player_var1 == 'Specific Players':
player_var2 = st.multiselect('Which players do you want?', options = raw_baselines['Player'].unique())
elif player_var1 == 'Full Slate':
player_var2 = raw_baselines.Player.values.tolist()
if st.button("Prepare data export", key='data_export'):
data_export = st.session_state.working_seed.copy()
# if site_var1 == 'Draftkings':
# for col_idx in range(6):
# data_export[:, col_idx] = np.array([id_dict.get(player, player) for player in data_export[:, col_idx]])
# elif site_var1 == 'Fanduel':
# for col_idx in range(6):
# data_export[:, col_idx] = np.array([id_dict.get(player, player) for player in data_export[:, col_idx]])
st.download_button(
label="Export optimals set",
data=convert_df(data_export),
file_name='MLB_optimals_export.csv',
mime='text/csv',
)
if site_var1 == 'Draftkings':
if 'working_seed' in st.session_state:
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = dk_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif 'working_seed' not in st.session_state:
st.session_state.working_seed = dk_lineups.copy()
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = dk_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif site_var1 == 'Fanduel':
if 'working_seed' in st.session_state:
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = fd_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
elif 'working_seed' not in st.session_state:
st.session_state.working_seed = fd_lineups.copy()
st.session_state.working_seed = st.session_state.working_seed
if player_var1 == 'Specific Players':
st.session_state.working_seed = st.session_state.working_seed[np.equal.outer(st.session_state.working_seed, player_var2).any(axis=1).all(axis=1)]
elif player_var1 == 'Full Slate':
st.session_state.working_seed = fd_lineups.copy()
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names)
export_file = st.session_state.data_export_display.copy()
# if site_var1 == 'Draftkings':
# for col_idx in range(6):
# export_file.iloc[:, col_idx] = export_file.iloc[:, col_idx].map(id_dict)
# elif site_var1 == 'Fanduel':
# for col_idx in range(6):
# export_file.iloc[:, col_idx] = export_file.iloc[:, col_idx].map(id_dict)
with st.container():
if st.button("Reset Optimals", key='reset3'):
for key in st.session_state.keys():
del st.session_state[key]
if site_var1 == 'Draftkings':
st.session_state.working_seed = dk_lineups.copy()
elif site_var1 == 'Fanduel':
st.session_state.working_seed = fd_lineups.copy()
if 'data_export_display' in st.session_state:
st.dataframe(st.session_state.data_export_display.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), height=500, use_container_width = True)
st.download_button(
label="Export display optimals",
data=convert_df(export_file),
file_name='MLB_display_optimals.csv',
mime='text/csv',
)
with st.container():
if 'working_seed' in st.session_state:
# Create a new dataframe with summary statistics
if site_var1 == 'Draftkings':
summary_df = pd.DataFrame({
'Metric': ['Min', 'Average', 'Max', 'STDdev'],
'Salary': [
np.min(st.session_state.working_seed[:,6]),
np.mean(st.session_state.working_seed[:,6]),
np.max(st.session_state.working_seed[:,6]),
np.std(st.session_state.working_seed[:,6])
],
'Proj': [
np.min(st.session_state.working_seed[:,7]),
np.mean(st.session_state.working_seed[:,7]),
np.max(st.session_state.working_seed[:,7]),
np.std(st.session_state.working_seed[:,7])
],
'Own': [
np.min(st.session_state.working_seed[:,8]),
np.mean(st.session_state.working_seed[:,8]),
np.max(st.session_state.working_seed[:,8]),
np.std(st.session_state.working_seed[:,8])
]
})
elif site_var1 == 'Fanduel':
summary_df = pd.DataFrame({
'Metric': ['Min', 'Average', 'Max', 'STDdev'],
'Salary': [
np.min(st.session_state.working_seed[:,6]),
np.mean(st.session_state.working_seed[:,6]),
np.max(st.session_state.working_seed[:,6]),
np.std(st.session_state.working_seed[:,6])
],
'Proj': [
np.min(st.session_state.working_seed[:,7]),
np.mean(st.session_state.working_seed[:,7]),
np.max(st.session_state.working_seed[:,7]),
np.std(st.session_state.working_seed[:,7])
],
'Own': [
np.min(st.session_state.working_seed[:,8]),
np.mean(st.session_state.working_seed[:,8]),
np.max(st.session_state.working_seed[:,8]),
np.std(st.session_state.working_seed[:,8])
]
})
# Set the index of the summary dataframe as the "Metric" column
summary_df = summary_df.set_index('Metric')
# Display the summary dataframe
st.subheader("Optimal Statistics")
st.dataframe(summary_df.style.format({
'Salary': '{:.2f}',
'Proj': '{:.2f}',
'Own': '{:.2f}'
}).background_gradient(cmap='RdYlGn', axis=0, subset=['Salary', 'Proj', 'Own']), use_container_width=True)
with st.container():
tab1, tab2 = st.tabs(["Display Frequency", "Seed Frame Frequency"])
with tab1:
if 'data_export_display' in st.session_state:
if site_var1 == 'Draftkings':
player_columns = st.session_state.data_export_display.iloc[:, :6]
elif site_var1 == 'Fanduel':
player_columns = st.session_state.data_export_display.iloc[:, :6]
# Flatten the DataFrame and count unique values
value_counts = player_columns.values.flatten().tolist()
value_counts = pd.Series(value_counts).value_counts()
percentages = (value_counts / lineup_num_var * 100).round(2)
# Create a DataFrame with the results
summary_df = pd.DataFrame({
'Player': value_counts.index,
'Frequency': value_counts.values,
'Percentage': percentages.values
})
# Sort by frequency in descending order
summary_df['Salary'] = summary_df['Player'].map(player_salaries)
summary_df = summary_df[['Player', 'Salary', 'Frequency', 'Percentage']]
summary_df = summary_df.sort_values('Frequency', ascending=False)
summary_df = summary_df.set_index('Player')
# Display the table
st.write("Player Frequency Table:")
st.dataframe(summary_df.style.format({'Percentage': '{:.2f}%'}), height=500, use_container_width=True)
st.download_button(
label="Export player frequency",
data=convert_df_to_csv(summary_df),
file_name='MLB_player_frequency.csv',
mime='text/csv',
)
with tab2:
if 'working_seed' in st.session_state:
if site_var1 == 'Draftkings':
player_columns = st.session_state.working_seed[:, :6]
elif site_var1 == 'Fanduel':
player_columns = st.session_state.working_seed[:, :6]
# Flatten the DataFrame and count unique values
value_counts = player_columns.flatten().tolist()
value_counts = pd.Series(value_counts).value_counts()
percentages = (value_counts / len(st.session_state.working_seed) * 100).round(2)
# Create a DataFrame with the results
summary_df = pd.DataFrame({
'Player': value_counts.index,
'Frequency': value_counts.values,
'Percentage': percentages.values
})
# Sort by frequency in descending order
summary_df['Salary'] = summary_df['Player'].map(player_salaries)
summary_df = summary_df[['Player', 'Salary', 'Frequency', 'Percentage']]
summary_df = summary_df.sort_values('Frequency', ascending=False)
summary_df = summary_df.set_index('Player')
# Display the table
st.write("Seed Frame Frequency Table:")
st.dataframe(summary_df.style.format({'Percentage': '{:.2f}%'}), height=500, use_container_width=True)
st.download_button(
label="Export seed frame frequency",
data=convert_df_to_csv(summary_df),
file_name='MLB_seed_frame_frequency.csv',
mime='text/csv',
)