James McCool
Fix case sensitivity in database collection names for Draftkings and Fanduel in app.py
7094bdb
import streamlit as st | |
st.set_page_config(layout="wide") | |
import numpy as np | |
import pandas as pd | |
import pymongo | |
def init_conn(): | |
uri = st.secrets['mongo_uri'] | |
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000) | |
db = client["MLB_Database"] | |
return db | |
db = init_conn() | |
percentages_format = {'Exposure': '{:.2%}'} | |
freq_format = {'Exposure': '{:.2%}', 'Proj Own': '{:.2%}', 'Edge': '{:.2%}'} | |
dk_columns = ['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own'] | |
fd_columns = ['P', 'C_1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'UTIL', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own'] | |
def init_DK_seed_frames(sharp_split): | |
collection = db['DK_MLB_name_map'] | |
cursor = collection.find() | |
raw_data = pd.DataFrame(list(cursor)) | |
names_dict = dict(zip(raw_data['key'], raw_data['value'])) | |
# Get the valid players from the Range of Outcomes collection | |
collection = db["Player_Range_Of_Outcomes"] | |
cursor = collection.find({"Site": "Draftkings", "Slate": "main_slate"}) | |
valid_players = set(pd.DataFrame(list(cursor))['Player'].unique()) | |
collection = db["DK_MLB_seed_frame"] | |
cursor = collection.find().limit(sharp_split) | |
raw_display = pd.DataFrame(list(cursor)) | |
raw_display = raw_display[['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']] | |
dict_columns = ['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3'] | |
# Map names | |
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict)) | |
DK_seed = raw_display.to_numpy() | |
return DK_seed | |
def init_DK_secondary_seed_frames(sharp_split): | |
collection = db['DK_MLB_Secondary_name_map'] | |
cursor = collection.find() | |
raw_data = pd.DataFrame(list(cursor)) | |
names_dict = dict(zip(raw_data['key'], raw_data['value'])) | |
# Get the valid players from the Range of Outcomes collection | |
collection = db["Player_Range_Of_Outcomes"] | |
cursor = collection.find({"Site": "Draftkings", "Slate": "secondary_slate"}) | |
valid_players = set(pd.DataFrame(list(cursor))['Player'].unique()) | |
collection = db["DK_MLB_Secondary_seed_frame"] | |
cursor = collection.find().limit(sharp_split) | |
raw_display = pd.DataFrame(list(cursor)) | |
raw_display = raw_display[['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']] | |
dict_columns = ['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3'] | |
# Map names | |
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict)) | |
DK_seed = raw_display.to_numpy() | |
return DK_seed | |
def init_DK_auxiliary_seed_frames(sharp_split): | |
collection = db['DK_MLB_Turbo_name_map'] | |
cursor = collection.find() | |
raw_data = pd.DataFrame(list(cursor)) | |
names_dict = dict(zip(raw_data['key'], raw_data['value'])) | |
# Get the valid players from the Range of Outcomes collection | |
collection = db["Player_Range_Of_Outcomes"] | |
cursor = collection.find({"Site": "Draftkings", "Slate": "turbo_slate"}) | |
valid_players = set(pd.DataFrame(list(cursor))['Player'].unique()) | |
collection = db["DK_MLB_Turbo_seed_frame"] | |
cursor = collection.find().limit(sharp_split) | |
raw_display = pd.DataFrame(list(cursor)) | |
raw_display = raw_display[['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']] | |
dict_columns = ['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3'] | |
# Map names | |
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict)) | |
DK_seed = raw_display.to_numpy() | |
return DK_seed | |
def init_FD_seed_frames(sharp_split): | |
collection = db['FD_MLB_name_map'] | |
cursor = collection.find() | |
raw_data = pd.DataFrame(list(cursor)) | |
names_dict = dict(zip(raw_data['key'], raw_data['value'])) | |
# Get the valid players from the Range of Outcomes collection | |
collection = db["Player_Range_Of_Outcomes"] | |
cursor = collection.find({"Site": "Fanduel", "Slate": "main_slate"}) | |
valid_players = set(pd.DataFrame(list(cursor))['Player'].unique()) | |
collection = db["FD_MLB_seed_frame"] | |
cursor = collection.find().limit(sharp_split) | |
raw_display = pd.DataFrame(list(cursor)) | |
raw_display = raw_display[['P', 'C_1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'UTIL', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']] | |
dict_columns = ['P', 'C_1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'UTIL'] | |
# Map names | |
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict)) | |
FD_seed = raw_display.to_numpy() | |
return FD_seed | |
def init_FD_secondary_seed_frames(sharp_split): | |
collection = db['FD_MLB_Secondary_name_map'] | |
cursor = collection.find() | |
raw_data = pd.DataFrame(list(cursor)) | |
names_dict = dict(zip(raw_data['key'], raw_data['value'])) | |
# Get the valid players from the Range of Outcomes collection | |
collection = db["Player_Range_Of_Outcomes"] | |
cursor = collection.find({"Site": "Fanduel", "Slate": "secondary_slate"}) | |
valid_players = set(pd.DataFrame(list(cursor))['Player'].unique()) | |
collection = db["FD_MLB_Secondary_seed_frame"] | |
cursor = collection.find().limit(sharp_split) | |
raw_display = pd.DataFrame(list(cursor)) | |
raw_display = raw_display[['P', 'C_1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'UTIL', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']] | |
dict_columns = ['P', 'C_1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'UTIL'] | |
# Map names | |
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict)) | |
FD_seed = raw_display.to_numpy() | |
return FD_seed | |
def init_FD_auxiliary_seed_frames(sharp_split): | |
collection = db['FD_MLB_Turbo_name_map'] | |
cursor = collection.find() | |
raw_data = pd.DataFrame(list(cursor)) | |
names_dict = dict(zip(raw_data['key'], raw_data['value'])) | |
# Get the valid players from the Range of Outcomes collection | |
collection = db["Player_Range_Of_Outcomes"] | |
cursor = collection.find({"Site": "Fanduel", "Slate": "turbo_slate"}) | |
valid_players = set(pd.DataFrame(list(cursor))['Player'].unique()) | |
collection = db["FD_MLB_Turbo_seed_frame"] | |
cursor = collection.find().limit(sharp_split) | |
raw_display = pd.DataFrame(list(cursor)) | |
raw_display = raw_display[['P', 'C_1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'UTIL', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']] | |
dict_columns = ['P', 'C_1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3', 'UTIL'] | |
# Map names | |
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict)) | |
FD_seed = raw_display.to_numpy() | |
return FD_seed | |
def init_baselines(): | |
collection = db["Player_Range_Of_Outcomes"] | |
cursor = collection.find() | |
load_display = pd.DataFrame(list(cursor)) | |
load_display.replace('', np.nan, inplace=True) | |
load_display.rename(columns={"Fantasy": "Median", 'Name': 'Player', 'player_ID': 'player_id'}, inplace = True) | |
load_display = load_display[load_display['Median'] > 0] | |
dk_roo_raw = load_display[load_display['Site'] == 'Draftkings'] | |
dk_roo_raw = dk_roo_raw[dk_roo_raw['Slate'] == 'main_slate'] | |
dk_roo_raw['STDev'] = dk_roo_raw['Median'] / 3 | |
dk_raw = dk_roo_raw.dropna(subset=['Median']) | |
dk_raw = dk_raw.rename(columns={'Own%': 'Own'}) | |
fd_roo_raw = load_display[load_display['Site'] == 'Fanduel'] | |
fd_roo_raw = fd_roo_raw[fd_roo_raw['Slate'] == 'main_slate'] | |
fd_roo_raw['STDev'] = fd_roo_raw['Median'] / 3 | |
fd_raw = fd_roo_raw.dropna(subset=['Median']) | |
fd_raw = fd_raw.rename(columns={'Own%': 'Own'}) | |
dk_secondary_roo_raw = load_display[load_display['Site'] == 'Draftkings'] | |
dk_secondary_roo_raw = dk_secondary_roo_raw[dk_secondary_roo_raw['Slate'] == 'secondary_slate'] | |
dk_secondary_roo_raw['STDev'] = dk_secondary_roo_raw['Median'] / 3 | |
dk_secondary = dk_secondary_roo_raw.dropna(subset=['Median']) | |
dk_secondary = dk_secondary.rename(columns={'Own%': 'Own'}) | |
fd_secondary_roo_raw = load_display[load_display['Site'] == 'Fanduel'] | |
fd_secondary_roo_raw = fd_secondary_roo_raw[fd_secondary_roo_raw['Slate'] == 'secondary_slate'] | |
fd_secondary_roo_raw['STDev'] = fd_secondary_roo_raw['Median'] / 3 | |
fd_secondary = fd_secondary_roo_raw.dropna(subset=['Median']) | |
fd_secondary = fd_secondary.rename(columns={'Own%': 'Own'}) | |
dk_auxiliary_roo_raw = load_display[load_display['Site'] == 'Draftkings'] | |
dk_auxiliary_roo_raw = dk_auxiliary_roo_raw[dk_auxiliary_roo_raw['Slate'] == 'turbo_slate'] | |
dk_auxiliary_roo_raw['STDev'] = dk_auxiliary_roo_raw['Median'] / 3 | |
dk_auxiliary = dk_auxiliary_roo_raw.dropna(subset=['Median']) | |
dk_auxiliary = dk_auxiliary.rename(columns={'Own%': 'Own'}) | |
fd_auxiliary_roo_raw = load_display[load_display['Site'] == 'Fanduel'] | |
fd_auxiliary_roo_raw = fd_auxiliary_roo_raw[fd_auxiliary_roo_raw['Slate'] == 'turbo_slate'] | |
fd_auxiliary_roo_raw['STDev'] = fd_auxiliary_roo_raw['Median'] / 3 | |
fd_auxiliary = fd_auxiliary_roo_raw.dropna(subset=['Median']) | |
fd_auxiliary = fd_auxiliary.rename(columns={'Own%': 'Own'}) | |
teams_playing_count = len(dk_raw.Team.unique()) | |
return dk_raw, fd_raw, dk_secondary, fd_secondary, dk_auxiliary, fd_auxiliary, teams_playing_count | |
def validate_lineup_players(df, valid_players, player_columns): | |
""" | |
Validates that all players in specified columns exist in valid_players set | |
Args: | |
df: DataFrame containing lineups | |
valid_players: Set of valid player names | |
player_columns: List of columns containing player names | |
Returns: | |
DataFrame with only valid lineups | |
""" | |
valid_rows = df[player_columns].apply(lambda x: x.isin(valid_players)).all(axis=1) | |
return df[valid_rows] | |
def convert_df(array): | |
array = pd.DataFrame(array, columns=column_names) | |
return array.to_csv().encode('utf-8') | |
def calculate_DK_value_frequencies(np_array): | |
unique, counts = np.unique(np_array[:, :10], return_counts=True) | |
frequencies = counts / len(np_array) # Normalize by the number of rows | |
combined_array = np.column_stack((unique, frequencies)) | |
return combined_array | |
def calculate_FD_value_frequencies(np_array): | |
unique, counts = np.unique(np_array[:, :9], return_counts=True) | |
frequencies = counts / len(np_array) # Normalize by the number of rows | |
combined_array = np.column_stack((unique, frequencies)) | |
return combined_array | |
def sim_contest(Sim_size, seed_frame, maps_dict, Contest_Size, teams_playing_count, site): | |
SimVar = 1 | |
Sim_Winners = [] | |
fp_array = seed_frame.copy() | |
# Pre-vectorize functions | |
vec_projection_map = np.vectorize(maps_dict['Projection_map'].__getitem__) | |
vec_stdev_map = np.vectorize(maps_dict['STDev_map'].__getitem__) | |
st.write('Simulating contest on frames') | |
while SimVar <= Sim_size: | |
fp_random = fp_array[np.random.choice(fp_array.shape[0], Contest_Size)] | |
if site == 'Draftkings': | |
# Calculate stack multipliers first | |
stack_multiplier = np.ones(fp_random.shape[0]) # Start with no bonus | |
stack_multiplier += np.minimum(0.10, np.where(fp_random[:, 13] == 4, 0.025 * (teams_playing_count - 8), 0)) | |
stack_multiplier += np.minimum(0.15, np.where(fp_random[:, 13] >= 5, 0.025 * (teams_playing_count - 12), 0)) | |
elif site == 'Fanduel': | |
# Calculate stack multipliers first | |
stack_multiplier = np.ones(fp_random.shape[0]) # Start with no bonus | |
stack_multiplier += np.minimum(0.10, np.where(fp_random[:, 12] == 4, 0.025 * (teams_playing_count - 8), 0)) | |
stack_multiplier += np.minimum(0.15, np.where(fp_random[:, 12] >= 5, 0.025 * (teams_playing_count - 12), 0)) | |
# Apply multipliers to both loc and scale in the normal distribution | |
base_projections = np.sum(np.random.normal( | |
loc=vec_projection_map(fp_random[:, :-7]) * stack_multiplier[:, np.newaxis], | |
scale=vec_stdev_map(fp_random[:, :-7]) * stack_multiplier[:, np.newaxis]), | |
axis=1) | |
final_projections = base_projections | |
sample_arrays = np.c_[fp_random, final_projections] | |
if site == 'Draftkings': | |
final_array = sample_arrays[sample_arrays[:, 10].argsort()[::-1]] | |
elif site == 'Fanduel': | |
final_array = sample_arrays[sample_arrays[:, 9].argsort()[::-1]] | |
best_lineup = final_array[final_array[:, -1].argsort(kind='stable')[::-1][:1]] | |
Sim_Winners.append(best_lineup) | |
SimVar += 1 | |
return Sim_Winners | |
dk_raw, fd_raw, dk_secondary, fd_secondary, dk_auxiliary, fd_auxiliary, teams_playing_count = init_baselines() | |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id)) | |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id)) | |
tab1, tab2 = st.tabs(['Contest Sims', 'Data Export']) | |
with tab1: | |
with st.expander("Info and Filters"): | |
if st.button("Load/Reset Data", key='reset2'): | |
st.cache_data.clear() | |
for key in st.session_state.keys(): | |
del st.session_state[key] | |
DK_seed = init_DK_seed_frames(10000) | |
FD_seed = init_FD_seed_frames(10000) | |
dk_raw, fd_raw, dk_secondary, fd_secondary, dk_auxiliary, fd_auxiliary, teams_playing_count = init_baselines() | |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id)) | |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id)) | |
sim_slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate', 'Auxiliary Slate'), key='sim_slate_var1') | |
sim_site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='sim_site_var1') | |
contest_var1 = st.selectbox("What contest size are you simulating?", ('Small', 'Medium', 'Large', 'Custom')) | |
if contest_var1 == 'Small': | |
Contest_Size = 1000 | |
elif contest_var1 == 'Medium': | |
Contest_Size = 5000 | |
elif contest_var1 == 'Large': | |
Contest_Size = 10000 | |
elif contest_var1 == 'Custom': | |
Contest_Size = st.number_input("Insert contest size", value=100, placeholder="Type a number under 10,000...") | |
strength_var1 = st.selectbox("How sharp is the field in the contest?", ('Very', 'Above Average', 'Average', 'Below Average', 'Not Very')) | |
if strength_var1 == 'Not Very': | |
sharp_split = 500000 | |
elif strength_var1 == 'Below Average': | |
sharp_split = 250000 | |
elif strength_var1 == 'Average': | |
sharp_split = 100000 | |
elif strength_var1 == 'Above Average': | |
sharp_split = 50000 | |
elif strength_var1 == 'Very': | |
sharp_split = 10000 | |
if st.button("Run Contest Sim"): | |
if 'working_seed' in st.session_state: | |
st.session_state.maps_dict = { | |
'Projection_map':dict(zip(raw_baselines.Player,raw_baselines.Median)), | |
'Salary_map':dict(zip(raw_baselines.Player,raw_baselines.Salary)), | |
'Pos_map':dict(zip(raw_baselines.Player,raw_baselines.Position)), | |
'Own_map':dict(zip(raw_baselines.Player,raw_baselines['Own'])), | |
'Team_map':dict(zip(raw_baselines.Player,raw_baselines.Team)), | |
'STDev_map':dict(zip(raw_baselines.Player,raw_baselines.STDev)) | |
} | |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, st.session_state.maps_dict, Contest_Size, teams_playing_count, sim_site_var1) | |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners)) | |
#st.table(Sim_Winner_Frame) | |
# Initial setup | |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners), columns=column_names + ['Fantasy']) | |
Sim_Winner_Frame['GPP_Proj'] = (Sim_Winner_Frame['proj'] + Sim_Winner_Frame['Fantasy']) / 2 | |
Sim_Winner_Frame['unique_id'] = Sim_Winner_Frame['proj'].astype(str) + Sim_Winner_Frame['salary'].astype(str) + Sim_Winner_Frame['Team'].astype(str) + Sim_Winner_Frame['Secondary'].astype(str) | |
Sim_Winner_Frame = Sim_Winner_Frame.assign(win_count=Sim_Winner_Frame['unique_id'].map(Sim_Winner_Frame['unique_id'].value_counts())) | |
# Type Casting | |
type_cast_dict = {'salary': int, 'proj': np.float16, 'Fantasy': np.float16, 'GPP_Proj': np.float32, 'Own': np.float32} | |
Sim_Winner_Frame = Sim_Winner_Frame.astype(type_cast_dict) | |
# Sorting | |
st.session_state.Sim_Winner_Frame = Sim_Winner_Frame.sort_values(by=['win_count', 'GPP_Proj'], ascending= [False, False]).copy().drop_duplicates(subset='unique_id').head(100) | |
st.session_state.Sim_Winner_Frame.drop(columns='unique_id', inplace=True) | |
# Data Copying | |
st.session_state.Sim_Winner_Export = Sim_Winner_Frame.copy() | |
for col in st.session_state.Sim_Winner_Export.iloc[:, 0:9].columns: | |
st.session_state.Sim_Winner_Export[col] = st.session_state.Sim_Winner_Export[col].map(dk_id_dict) | |
st.session_state.Sim_Winner_Export = st.session_state.Sim_Winner_Export.drop_duplicates(subset=['Team', 'Secondary', 'salary', 'unique_id']) | |
# Data Copying | |
st.session_state.Sim_Winner_Display = Sim_Winner_Frame.copy() | |
else: | |
if sim_site_var1 == 'Draftkings': | |
if sim_slate_var1 == 'Main Slate': | |
st.session_state.working_seed = init_DK_seed_frames(sharp_split) | |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id)) | |
raw_baselines = dk_raw | |
column_names = dk_columns | |
elif sim_slate_var1 == 'Secondary Slate': | |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split) | |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id)) | |
raw_baselines = dk_secondary | |
column_names = dk_columns | |
elif sim_slate_var1 == 'Auxiliary Slate': | |
st.session_state.working_seed = init_DK_auxiliary_seed_frames(sharp_split) | |
dk_id_dict = dict(zip(dk_auxiliary.Player, dk_auxiliary.player_id)) | |
raw_baselines = dk_auxiliary | |
column_names = dk_columns | |
elif sim_site_var1 == 'Fanduel': | |
if sim_slate_var1 == 'Main Slate': | |
st.session_state.working_seed = init_FD_seed_frames(sharp_split) | |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id)) | |
raw_baselines = fd_raw | |
column_names = fd_columns | |
elif sim_slate_var1 == 'Secondary Slate': | |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split) | |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id)) | |
raw_baselines = fd_secondary | |
column_names = fd_columns | |
elif sim_slate_var1 == 'Auxiliary Slate': | |
st.session_state.working_seed = init_FD_auxiliary_seed_frames(sharp_split) | |
fd_id_dict = dict(zip(fd_auxiliary.Player, fd_auxiliary.player_id)) | |
raw_baselines = fd_auxiliary | |
column_names = fd_columns | |
st.session_state.maps_dict = { | |
'Projection_map':dict(zip(raw_baselines.Player,raw_baselines.Median)), | |
'Salary_map':dict(zip(raw_baselines.Player,raw_baselines.Salary)), | |
'Pos_map':dict(zip(raw_baselines.Player,raw_baselines.Position)), | |
'Own_map':dict(zip(raw_baselines.Player,raw_baselines['Own'])), | |
'Team_map':dict(zip(raw_baselines.Player,raw_baselines.Team)), | |
'STDev_map':dict(zip(raw_baselines.Player,raw_baselines.STDev)) | |
} | |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, st.session_state.maps_dict, Contest_Size, teams_playing_count, sim_site_var1) | |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners)) | |
#st.table(Sim_Winner_Frame) | |
# Initial setup | |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners), columns=column_names + ['Fantasy']) | |
Sim_Winner_Frame['GPP_Proj'] = (Sim_Winner_Frame['proj'] + Sim_Winner_Frame['Fantasy']) / 2 | |
Sim_Winner_Frame['unique_id'] = Sim_Winner_Frame['proj'].astype(str) + Sim_Winner_Frame['salary'].astype(str) + Sim_Winner_Frame['Team'].astype(str) + Sim_Winner_Frame['Secondary'].astype(str) | |
Sim_Winner_Frame = Sim_Winner_Frame.assign(win_count=Sim_Winner_Frame['unique_id'].map(Sim_Winner_Frame['unique_id'].value_counts())) | |
# Type Casting | |
type_cast_dict = {'salary': int, 'proj': np.float16, 'Fantasy': np.float16, 'GPP_Proj': np.float32, 'Own': np.float32} | |
Sim_Winner_Frame = Sim_Winner_Frame.astype(type_cast_dict) | |
# Sorting | |
st.session_state.Sim_Winner_Frame = Sim_Winner_Frame.sort_values(by=['win_count', 'GPP_Proj'], ascending= [False, False]).copy().drop_duplicates(subset='unique_id').head(100) | |
st.session_state.Sim_Winner_Frame.drop(columns='unique_id', inplace=True) | |
# Data Copying | |
st.session_state.Sim_Winner_Export = Sim_Winner_Frame.copy() | |
for col in st.session_state.Sim_Winner_Export.iloc[:, 0:10].columns: | |
st.session_state.Sim_Winner_Export[col] = st.session_state.Sim_Winner_Export[col].map(dk_id_dict) | |
st.session_state.Sim_Winner_Export = st.session_state.Sim_Winner_Export.drop_duplicates(subset=['Team', 'Secondary', 'salary', 'unique_id']) | |
# Data Copying | |
st.session_state.Sim_Winner_Display = Sim_Winner_Frame.copy() | |
st.session_state.freq_copy = st.session_state.Sim_Winner_Display | |
if sim_site_var1 == 'Draftkings': | |
freq_working = pd.DataFrame(np.column_stack(np.unique(st.session_state.freq_copy.iloc[:,0:10].values, return_counts=True)), | |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True) | |
elif sim_site_var1 == 'Fanduel': | |
freq_working = pd.DataFrame(np.column_stack(np.unique(st.session_state.freq_copy.iloc[:,0:9].values, return_counts=True)), | |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True) | |
freq_working['Freq'] = freq_working['Freq'].astype(int) | |
freq_working['Position'] = freq_working['Player'].map(st.session_state.maps_dict['Pos_map']) | |
freq_working['Salary'] = freq_working['Player'].map(st.session_state.maps_dict['Salary_map']) | |
freq_working['Proj Own'] = freq_working['Player'].map(st.session_state.maps_dict['Own_map']) / 100 | |
freq_working['Exposure'] = freq_working['Freq']/(1000) | |
freq_working['Edge'] = freq_working['Exposure'] - freq_working['Proj Own'] | |
freq_working['Team'] = freq_working['Player'].map(st.session_state.maps_dict['Team_map']) | |
st.session_state.player_freq = freq_working.copy() | |
if sim_site_var1 == 'Draftkings': | |
sp_working = pd.DataFrame(np.column_stack(np.unique(st.session_state.freq_copy.iloc[:,0:2].values, return_counts=True)), | |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True) | |
elif sim_site_var1 == 'Fanduel': | |
sp_working = pd.DataFrame(np.column_stack(np.unique(st.session_state.freq_copy.iloc[:,0:1].values, return_counts=True)), | |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True) | |
sp_working['Freq'] = sp_working['Freq'].astype(int) | |
sp_working['Position'] = sp_working['Player'].map(st.session_state.maps_dict['Pos_map']) | |
sp_working['Salary'] = sp_working['Player'].map(st.session_state.maps_dict['Salary_map']) | |
sp_working['Proj Own'] = sp_working['Player'].map(st.session_state.maps_dict['Own_map']) / 100 | |
sp_working['Exposure'] = sp_working['Freq']/(1000) | |
sp_working['Edge'] = sp_working['Exposure'] - sp_working['Proj Own'] | |
sp_working['Team'] = sp_working['Player'].map(st.session_state.maps_dict['Team_map']) | |
st.session_state.sp_freq = sp_working.copy() | |
if sim_site_var1 == 'Draftkings': | |
team_working = pd.DataFrame(np.column_stack(np.unique(st.session_state.freq_copy.iloc[:,12:13].values, return_counts=True)), | |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True) | |
elif sim_site_var1 == 'Fanduel': | |
team_working = pd.DataFrame(np.column_stack(np.unique(st.session_state.freq_copy.iloc[:,11:12].values, return_counts=True)), | |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True) | |
team_working['Freq'] = team_working['Freq'].astype(int) | |
team_working['Exposure'] = team_working['Freq']/(1000) | |
st.session_state.team_freq = team_working.copy() | |
if sim_site_var1 == 'Draftkings': | |
stack_working = pd.DataFrame(np.column_stack(np.unique(st.session_state.freq_copy.iloc[:,13:14].values, return_counts=True)), | |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True) | |
elif sim_site_var1 == 'Fanduel': | |
stack_working = pd.DataFrame(np.column_stack(np.unique(st.session_state.freq_copy.iloc[:,12:13].values, return_counts=True)), | |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True) | |
stack_working['Freq'] = stack_working['Freq'].astype(int) | |
stack_working['Exposure'] = stack_working['Freq']/(1000) | |
st.session_state.stack_freq = stack_working.copy() | |
with st.container(): | |
if st.button("Reset Sim", key='reset_sim'): | |
for key in st.session_state.keys(): | |
del st.session_state[key] | |
if 'player_freq' in st.session_state: | |
player_split_var2 = st.radio("Are you wanting to isolate any lineups with specific players?", ('Full Players', 'Specific Players'), key='player_split_var2') | |
if player_split_var2 == 'Specific Players': | |
find_var2 = st.multiselect('Which players must be included in the lineups?', options = st.session_state.player_freq['Player'].unique()) | |
elif player_split_var2 == 'Full Players': | |
find_var2 = st.session_state.player_freq.Player.values.tolist() | |
if player_split_var2 == 'Specific Players': | |
st.session_state.Sim_Winner_Display = st.session_state.Sim_Winner_Frame[np.equal.outer(st.session_state.Sim_Winner_Frame.to_numpy(), find_var2).any(axis=1).all(axis=1)] | |
if player_split_var2 == 'Full Players': | |
st.session_state.Sim_Winner_Display = st.session_state.Sim_Winner_Frame | |
if 'Sim_Winner_Display' in st.session_state: | |
st.dataframe(st.session_state.Sim_Winner_Display.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), use_container_width = True) | |
if 'Sim_Winner_Export' in st.session_state: | |
st.download_button( | |
label="Export Full Frame", | |
data=st.session_state.Sim_Winner_Export.to_csv().encode('utf-8'), | |
file_name='MLB_consim_export.csv', | |
mime='text/csv', | |
) | |
tab1, tab2 = st.tabs(['Winning Frame Statistics', 'Stack Type Statistics']) | |
with tab1: | |
if 'Sim_Winner_Display' in st.session_state: | |
# Create a new dataframe with summary statistics | |
summary_df = pd.DataFrame({ | |
'Metric': ['Min', 'Average', 'Max', 'STDdev'], | |
'Salary': [ | |
st.session_state.Sim_Winner_Display['salary'].min(), | |
st.session_state.Sim_Winner_Display['salary'].mean(), | |
st.session_state.Sim_Winner_Display['salary'].max(), | |
st.session_state.Sim_Winner_Display['salary'].std() | |
], | |
'Proj': [ | |
st.session_state.Sim_Winner_Display['proj'].min(), | |
st.session_state.Sim_Winner_Display['proj'].mean(), | |
st.session_state.Sim_Winner_Display['proj'].max(), | |
st.session_state.Sim_Winner_Display['proj'].std() | |
], | |
'Own': [ | |
st.session_state.Sim_Winner_Display['Own'].min(), | |
st.session_state.Sim_Winner_Display['Own'].mean(), | |
st.session_state.Sim_Winner_Display['Own'].max(), | |
st.session_state.Sim_Winner_Display['Own'].std() | |
], | |
'Fantasy': [ | |
st.session_state.Sim_Winner_Display['Fantasy'].min(), | |
st.session_state.Sim_Winner_Display['Fantasy'].mean(), | |
st.session_state.Sim_Winner_Display['Fantasy'].max(), | |
st.session_state.Sim_Winner_Display['Fantasy'].std() | |
], | |
'GPP_Proj': [ | |
st.session_state.Sim_Winner_Display['GPP_Proj'].min(), | |
st.session_state.Sim_Winner_Display['GPP_Proj'].mean(), | |
st.session_state.Sim_Winner_Display['GPP_Proj'].max(), | |
st.session_state.Sim_Winner_Display['GPP_Proj'].std() | |
] | |
}) | |
# Set the index of the summary dataframe as the "Metric" column | |
summary_df = summary_df.set_index('Metric') | |
# Display the summary dataframe | |
st.subheader("Winning Frame Statistics") | |
st.dataframe(summary_df.style.format({ | |
'Salary': '{:.2f}', | |
'Proj': '{:.2f}', | |
'Own': '{:.2f}', | |
'Fantasy': '{:.2f}', | |
'GPP_Proj': '{:.2f}' | |
}).background_gradient(cmap='RdYlGn', axis=0, subset=['Salary', 'Proj', 'Own', 'Fantasy', 'GPP_Proj']), use_container_width=True) | |
with tab2: | |
if 'Sim_Winner_Display' in st.session_state: | |
# Apply position mapping to FLEX column | |
stack_counts = st.session_state.freq_copy['Team_count'].value_counts() | |
# Calculate average statistics for each stack size | |
stack_stats = st.session_state.freq_copy.groupby('Team_count').agg({ | |
'proj': 'mean', | |
'Own': 'mean', | |
'Fantasy': 'mean', | |
'GPP_Proj': 'mean' | |
}) | |
# Combine counts and average statistics | |
stack_summary = pd.concat([stack_counts, stack_stats], axis=1) | |
stack_summary.columns = ['Count', 'Avg Proj', 'Avg Own', 'Avg Fantasy', 'Avg GPP_Proj'] | |
stack_summary = stack_summary.reset_index() | |
stack_summary.columns = ['Stack Size', 'Count', 'Avg Proj', 'Avg Own', 'Avg Fantasy', 'Avg GPP_Proj'] | |
stack_summary = stack_summary.sort_values(by='Stack Size', ascending=True) | |
stack_summary = stack_summary.set_index('Stack Size') | |
# Display the summary dataframe | |
st.subheader("Stack Type Statistics") | |
st.dataframe(stack_summary.style.format({ | |
'Count': '{:.0f}', | |
'Avg Proj': '{:.2f}', | |
'Avg Own': '{:.2f}', | |
'Avg Fantasy': '{:.2f}', | |
'Avg GPP_Proj': '{:.2f}' | |
}).background_gradient(cmap='RdYlGn', axis=0, subset=['Count', 'Avg Proj', 'Avg Own', 'Avg Fantasy', 'Avg GPP_Proj']), use_container_width=True) | |
else: | |
st.write("Simulation data or position mapping not available.") | |
with st.container(): | |
tab1, tab2, tab3, tab4 = st.tabs(['Overall Exposures', 'SP Exposures', 'Team Exposures', 'Stack Size Exposures']) | |
with tab1: | |
if 'player_freq' in st.session_state: | |
st.dataframe(st.session_state.player_freq.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(freq_format, precision=2), use_container_width = True) | |
st.download_button( | |
label="Export Exposures", | |
data=st.session_state.player_freq.to_csv().encode('utf-8'), | |
file_name='player_freq_export.csv', | |
mime='text/csv', | |
key='overall' | |
) | |
with tab2: | |
if 'sp_freq' in st.session_state: | |
st.dataframe(st.session_state.sp_freq.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(freq_format, precision=2), use_container_width = True) | |
st.download_button( | |
label="Export Exposures", | |
data=st.session_state.sp_freq.to_csv().encode('utf-8'), | |
file_name='sp_freq.csv', | |
mime='text/csv', | |
key='sp' | |
) | |
with tab3: | |
if 'team_freq' in st.session_state: | |
st.dataframe(st.session_state.team_freq.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(percentages_format, precision=2), use_container_width = True) | |
st.download_button( | |
label="Export Exposures", | |
data=st.session_state.team_freq.to_csv().encode('utf-8'), | |
file_name='team_freq.csv', | |
mime='text/csv', | |
key='team' | |
) | |
with tab4: | |
if 'stack_freq' in st.session_state: | |
st.dataframe(st.session_state.stack_freq.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(percentages_format, precision=2), use_container_width = True) | |
st.download_button( | |
label="Export Exposures", | |
data=st.session_state.stack_freq.to_csv().encode('utf-8'), | |
file_name='stack_freq.csv', | |
mime='text/csv', | |
key='stack' | |
) | |
with tab2: | |
with st.expander("Info and Filters"): | |
if st.button("Load/Reset Data", key='reset1'): | |
st.cache_data.clear() | |
for key in st.session_state.keys(): | |
del st.session_state[key] | |
DK_seed = init_DK_seed_frames(10000) | |
FD_seed = init_FD_seed_frames(10000) | |
dk_raw, fd_raw, dk_secondary, fd_secondary, teams_playing_count = init_baselines() | |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id)) | |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id)) | |
slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate', 'Auxiliary Slate')) | |
site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel')) | |
sharp_split_var = st.number_input("How many lineups do you want?", value=10000, max_value=500000, min_value=10000, step=10000) | |
lineup_num_var = st.number_input("How many lineups do you want to display?", min_value=1, max_value=500, value=10, step=1) | |
if site_var1 == 'Draftkings': | |
team_var1 = st.radio("Do you want a frame with specific teams?", ('Full Slate', 'Specific Teams'), key='team_var1') | |
if team_var1 == 'Specific Teams': | |
team_var2 = st.multiselect('Which teams do you want?', options = dk_raw['Team'].unique()) | |
elif team_var1 == 'Full Slate': | |
team_var2 = dk_raw.Team.values.tolist() | |
stack_var1 = st.radio("Do you want a frame with specific stack sizes?", ('Full Slate', 'Specific Stack Sizes'), key='stack_var1') | |
if stack_var1 == 'Specific Stack Sizes': | |
stack_var2 = st.multiselect('Which stack sizes do you want?', options = [5, 4, 3, 2, 1, 0]) | |
elif stack_var1 == 'Full Slate': | |
stack_var2 = [5, 4, 3, 2, 1, 0] | |
raw_baselines = dk_raw | |
column_names = dk_columns | |
elif site_var1 == 'Fanduel': | |
team_var1 = st.radio("Do you want a frame with specific teams?", ('Full Slate', 'Specific Teams'), key='team_var1') | |
if team_var1 == 'Specific Teams': | |
team_var2 = st.multiselect('Which teams do you want?', options = fd_raw['Team'].unique()) | |
elif team_var1 == 'Full Slate': | |
team_var2 = fd_raw.Team.values.tolist() | |
stack_var1 = st.radio("Do you want a frame with specific stack sizes?", ('Full Slate', 'Specific Stack Sizes'), key='stack_var1') | |
if stack_var1 == 'Specific Stack Sizes': | |
stack_var2 = st.multiselect('Which stack sizes do you want?', options = [5, 4, 3, 2, 1, 0]) | |
elif stack_var1 == 'Full Slate': | |
stack_var2 = [5, 4, 3, 2, 1, 0] | |
raw_baselines = fd_raw | |
column_names = fd_columns | |
if st.button("Prepare data export", key='data_export'): | |
if 'working_seed' in st.session_state: | |
if site_var1 == 'Draftkings': | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 13], stack_var2)] | |
elif site_var1 == 'Fanduel': | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 11], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], stack_var2)] | |
st.session_state.data_export_display = st.session_state.working_seed[0:lineup_num_var] | |
elif 'working_seed' not in st.session_state: | |
if site_var1 == 'Draftkings': | |
if slate_var1 == 'Main Slate': | |
st.session_state.working_seed = init_DK_seed_frames(sharp_split_var) | |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id)) | |
raw_baselines = dk_raw | |
column_names = dk_columns | |
elif slate_var1 == 'Secondary Slate': | |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var) | |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id)) | |
raw_baselines = dk_secondary | |
column_names = dk_columns | |
elif slate_var1 == 'Auxiliary Slate': | |
st.session_state.working_seed = init_DK_auxiliary_seed_frames(sharp_split_var) | |
dk_id_dict = dict(zip(dk_auxiliary.Player, dk_auxiliary.player_id)) | |
raw_baselines = dk_auxiliary | |
column_names = dk_columns | |
elif site_var1 == 'Fanduel': | |
if slate_var1 == 'Main Slate': | |
st.session_state.working_seed = init_FD_seed_frames(sharp_split_var) | |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id)) | |
raw_baselines = fd_raw | |
column_names = fd_columns | |
elif slate_var1 == 'Secondary Slate': | |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var) | |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id)) | |
raw_baselines = fd_secondary | |
column_names = fd_columns | |
elif slate_var1 == 'Auxiliary Slate': | |
st.session_state.working_seed = init_FD_auxiliary_seed_frames(sharp_split_var) | |
fd_id_dict = dict(zip(fd_auxiliary.Player, fd_auxiliary.player_id)) | |
raw_baselines = fd_auxiliary | |
column_names = fd_columns | |
if site_var1 == 'Draftkings': | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 13], stack_var2)] | |
elif site_var1 == 'Fanduel': | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 11], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], stack_var2)] | |
st.session_state.data_export_display = st.session_state.working_seed[0:lineup_num_var] | |
data_export = st.session_state.working_seed.copy() | |
st.download_button( | |
label="Export optimals set", | |
data=convert_df(data_export), | |
file_name='MLB_optimals_export.csv', | |
mime='text/csv', | |
) | |
for key in st.session_state.keys(): | |
del st.session_state[key] | |
if st.button("Load Data", key='load_data'): | |
if site_var1 == 'Draftkings': | |
if 'working_seed' in st.session_state: | |
if site_var1 == 'Draftkings': | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 13], stack_var2)] | |
elif site_var1 == 'Fanduel': | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 11], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], stack_var2)] | |
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names) | |
elif 'working_seed' not in st.session_state: | |
if slate_var1 == 'Main Slate': | |
st.session_state.working_seed = init_DK_seed_frames(sharp_split_var) | |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id)) | |
raw_baselines = dk_raw | |
column_names = dk_columns | |
elif slate_var1 == 'Secondary Slate': | |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var) | |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id)) | |
raw_baselines = dk_secondary | |
column_names = dk_columns | |
elif slate_var1 == 'Auxiliary Slate': | |
st.session_state.working_seed = init_DK_auxiliary_seed_frames(sharp_split_var) | |
dk_id_dict = dict(zip(dk_auxiliary.Player, dk_auxiliary.player_id)) | |
raw_baselines = dk_auxiliary | |
column_names = dk_columns | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 13], stack_var2)] | |
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names) | |
elif site_var1 == 'Fanduel': | |
if 'working_seed' in st.session_state: | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 11], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], stack_var2)] | |
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names) | |
elif 'working_seed' not in st.session_state: | |
if slate_var1 == 'Main Slate': | |
st.session_state.working_seed = init_FD_seed_frames(sharp_split_var) | |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id)) | |
raw_baselines = fd_raw | |
column_names = fd_columns | |
elif slate_var1 == 'Secondary Slate': | |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var) | |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id)) | |
raw_baselines = fd_secondary | |
column_names = fd_columns | |
elif slate_var1 == 'Auxiliary Slate': | |
st.session_state.working_seed = init_FD_auxiliary_seed_frames(sharp_split_var) | |
fd_id_dict = dict(zip(fd_auxiliary.Player, fd_auxiliary.player_id)) | |
raw_baselines = fd_auxiliary | |
column_names = fd_columns | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 11], team_var2)] | |
st.session_state.working_seed = st.session_state.working_seed[np.isin(st.session_state.working_seed[:, 12], stack_var2)] | |
st.session_state.data_export_display = pd.DataFrame(st.session_state.working_seed[0:lineup_num_var], columns=column_names) | |
with st.container(): | |
if 'data_export_display' in st.session_state: | |
st.dataframe(st.session_state.data_export_display.style.format(freq_format, precision=2), use_container_width = True) |