Custom_ROO_Tool / function_hold /MLB_functions.py
James McCool
Add MLB support to ROO build functions and Streamlit display, including percentage formatting and data upload instructions
95b08b3
raw
history blame
22.7 kB
from numpy import nan as np_nan
from numpy import where as np_where
from numpy import random as np_random
from numpy import zeros as np_zeros
from numpy import array as np_array
from pandas import concat as pd_concat
from pandas import merge as pd_merge
from pandas import DataFrame
def DK_MLB_ROO_Build(projections_file, floor_var, ceiling_var, std_var, distribution_type):
sp_frame = projections_file[projections_file['Position'].str.contains('P')]
hit_frame = projections_file[~projections_file['Position'].str.contains('P')]
sp_norm_var = 200 / sp_frame['Own'].sum()
sp_frame['Own'] = sp_frame['Own'] * sp_norm_var
hit_norm_var = 800 / hit_frame['Own'].sum()
hit_frame['Own'] = hit_frame['Own'] * hit_norm_var
working_roo = pd_concat([sp_frame, hit_frame])
own_dict = dict(zip(working_roo.Player, working_roo.Own))
team_dict = dict(zip(working_roo.Player, working_roo.Team))
player_id_dict = dict(zip(working_roo.Player, working_roo.player_ID))
total_sims = 1000
basic_own_df = working_roo.copy()
basic_own_df['name_team'] = basic_own_df['Player'] + basic_own_df['Position']
def calculate_ownership(df):
# Filter the dataframe based on the position
frame = df.copy()
# Calculate Small Field Own%
frame['Base Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (5 * (frame['Own'] - (frame['Own'].mean() / 1.5)) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Base Own%'] = np_where(
frame['Base Own%'] > 85,
85,
frame['Base Own%']
)
# Calculate Small Field Own%
frame['Small Field Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (6 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Small Field Own%'] = np_where(
frame['Small Field Own%'] > 85,
85,
frame['Small Field Own%']
)
# Calculate Large Field Own%
frame['Large Field Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (2.5 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Large Field Own%'] = np_where(
frame['Large Field Own%'] > 85,
85,
frame['Large Field Own%']
)
# Calculate Cash Own%
frame['Cash Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (8 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Cash Own%'] = np_where(
frame['Cash Own%'] > 85,
85,
frame['Cash Own%']
)
return frame
# Apply the function to each dataframe
basic_own_df = calculate_ownership(basic_own_df)
own_norm_var_reg = 1000 / basic_own_df['Own'].sum()
own_norm_var_small = 1000 / basic_own_df['Small Field Own%'].sum()
own_norm_var_large = 1000 / basic_own_df['Large Field Own%'].sum()
own_norm_var_cash = 1000 / basic_own_df['Cash Own%'].sum()
basic_own_df['Own'] = basic_own_df['Own'] * own_norm_var_reg
basic_own_df['Small_Own'] = basic_own_df['Small Field Own%'] * own_norm_var_small
basic_own_df['Large_Own'] = basic_own_df['Large Field Own%'] * own_norm_var_large
basic_own_df['Cash_Own'] = basic_own_df['Cash Own%'] * own_norm_var_cash
basic_own_df['Own'] = np_where(basic_own_df['Own'] > 90, 90, basic_own_df['Own'])
# Apply the function to each dataframe
basic_own_df = calculate_ownership(basic_own_df)
own_norm_var_reg = 1000 / basic_own_df['Own'].sum()
own_norm_var_small = 1000 / basic_own_df['Small Field Own%'].sum()
own_norm_var_large = 1000 / basic_own_df['Large Field Own%'].sum()
own_norm_var_cash = 1000 / basic_own_df['Cash Own%'].sum()
basic_own_df['Own'] = basic_own_df['Own'] * own_norm_var_reg
basic_own_df['Small_Own'] = basic_own_df['Small Field Own%'] * own_norm_var_small
basic_own_df['Large_Own'] = basic_own_df['Large Field Own%'] * own_norm_var_large
basic_own_df['Cash_Own'] = basic_own_df['Cash Own%'] * own_norm_var_cash
own_dict = dict(zip(basic_own_df.Player, basic_own_df.Own))
small_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Small Field Own%']))
large_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Large Field Own%']))
cash_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Cash Own%']))
team_dict = dict(zip(basic_own_df.name_team, basic_own_df.Team))
opp_dict = dict(zip(basic_own_df.Player, basic_own_df.Opp))
flex_file = basic_own_df[['Player', 'Position', 'Salary', 'Median']]
flex_file = flex_file.rename(columns={"Agg": "Median"})
flex_file['Floor'] = (flex_file['Median'] * floor_var)
flex_file['Ceiling'] = flex_file['Median'] + (5 * ceiling_var)
flex_file['STD'] = (flex_file['Median'] / std_var)
flex_file = flex_file[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
flex_file = flex_file.reset_index(drop=True)
hold_file = flex_file.copy()
overall_file = flex_file.copy()
salary_file = flex_file.copy()
try:
overall_floor_gpu = np_array(overall_file['Floor'])
overall_ceiling_gpu = np_array(overall_file['Ceiling'])
overall_median_gpu = np_array(overall_file['Median'])
overall_std_gpu = np_array(overall_file['STD'])
overall_salary_gpu = np_array(overall_file['Salary'])
data_shape = (len(overall_file['Player']), total_sims) # Example: 1000 rows
salary_array = np_zeros(data_shape)
sim_array = np_zeros(data_shape)
for x in range(0, total_sims):
result_gpu = overall_salary_gpu
salary_array[:, x] = result_gpu
cupy_array = salary_array
salary_file = salary_file.reset_index(drop=True)
salary_cupy = DataFrame(cupy_array, columns=list(range(0, total_sims)))
salary_check_file = pd_concat([salary_file, salary_cupy], axis=1)
except:
for x in range(0,total_sims):
salary_file[x] = salary_file['Salary']
salary_check_file = salary_file.copy()
salary_file=salary_check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
salary_file = salary_file.div(1000)
try:
for x in range(0, total_sims):
if distribution_type == 'normal':
# Normal distribution (existing logic)
result_gpu = np_random.normal(overall_median_gpu, overall_std_gpu)
elif distribution_type == 'poisson':
# Poisson distribution - using median as lambda
result_gpu = np_random.poisson(overall_median_gpu)
elif distribution_type == 'bimodal':
# Bimodal distribution - mixture of two normal distributions
# First peak centered at 80% of median, second at 120% of median
if np_random.random() < 0.5:
result_gpu = np_random.normal(overall_floor_gpu, overall_std_gpu)
else:
result_gpu = np_random.normal(overall_ceiling_gpu, overall_std_gpu)
else:
raise ValueError("Invalid distribution type. Must be 'normal', 'poisson', or 'bimodal'")
sim_array[:, x] = result_gpu
add_array = sim_array
overall_file = overall_file.reset_index(drop=True)
df2 = DataFrame(add_array, columns=list(range(0, total_sims)))
check_file = pd_concat([overall_file, df2], axis=1)
except:
for x in range(0,total_sims):
if distribution_type == 'normal':
overall_file[x] = np_random.normal(overall_file['Median'], overall_file['STD'])
elif distribution_type == 'poisson':
overall_file[x] = np_random.poisson(overall_file['Median'])
elif distribution_type == 'bimodal':
# Bimodal distribution fallback
if np_random.random() < 0.5:
overall_file[x] = np_random.normal(overall_file['Median'] * 0.8, overall_file['STD'])
else:
overall_file[x] = np_random.normal(overall_file['Median'] * 1.2, overall_file['STD'])
check_file = overall_file.copy()
overall_file=check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
players_only = hold_file[['Player']]
raw_lineups_file = players_only
for x in range(0,total_sims):
maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
players_only[x] = raw_lineups_file[x].rank(ascending=False)
players_only=players_only.drop(['Player'], axis=1)
salary_2x_check = (overall_file - (salary_file*2))
salary_3x_check = (overall_file - (salary_file*3))
salary_4x_check = (overall_file - (salary_file*4))
gpp_check = (overall_file - ((salary_file*5)+10))
players_only['Average_Rank'] = players_only.mean(axis=1)
players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
players_only['20+%'] = overall_file[overall_file >= 20].count(axis=1)/float(total_sims)
players_only['2x%'] = salary_2x_check[salary_2x_check >= 1].count(axis=1)/float(total_sims)
players_only['3x%'] = salary_3x_check[salary_3x_check >= 1].count(axis=1)/float(total_sims)
players_only['4x%'] = salary_4x_check[salary_4x_check >= 1].count(axis=1)/float(total_sims)
players_only['GPP%'] = gpp_check[gpp_check >= 1].count(axis=1)/float(total_sims)
players_only['Player'] = hold_file[['Player']]
final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%']]
final_Proj = pd_merge(hold_file, final_outcomes, on="Player")
final_Proj = final_Proj[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%']]
final_Proj['name_team'] = final_Proj['Player'] + final_Proj['Position']
final_Proj['Own'] = final_Proj['Player'].map(own_dict)
final_Proj['Small_Own'] = final_Proj['Player'].map(small_own_dict)
final_Proj['Large_Own'] = final_Proj['Player'].map(large_own_dict)
final_Proj['Cash_Own'] = final_Proj['Player'].map(cash_own_dict)
final_Proj['Team'] = final_Proj['name_team'].map(team_dict)
final_Proj['Opp'] = final_Proj['Player'].map(opp_dict)
final_Proj = final_Proj[['Player', 'Position', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%',
'Own', 'Small_Own', 'Large_Own', 'Cash_Own']]
final_Proj = final_Proj.sort_values(by='Median', ascending=False)
return final_Proj.copy()
def FD_MLB_ROO_Build(projections_file, floor_var, ceiling_var, std_var, distribution_type):
sp_frame = projections_file[projections_file['Position'].str.contains('P')]
hit_frame = projections_file[~projections_file['Position'].str.contains('P')]
sp_norm_var = 100 / sp_frame['Own'].sum()
sp_frame['Own'] = sp_frame['Own'] * sp_norm_var
hit_norm_var = 800 / hit_frame['Own'].sum()
hit_frame['Own'] = hit_frame['Own'] * hit_norm_var
working_roo = pd_concat([sp_frame, hit_frame])
own_dict = dict(zip(working_roo.Player, working_roo.Own))
team_dict = dict(zip(working_roo.Player, working_roo.Team))
player_id_dict = dict(zip(working_roo.Player, working_roo.player_ID))
total_sims = 1000
basic_own_df = working_roo.copy()
basic_own_df['name_team'] = basic_own_df['Player'] + basic_own_df['Position']
def calculate_ownership(df):
# Filter the dataframe based on the position
frame = df.copy()
# Calculate Small Field Own%
frame['Base Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (5 * (frame['Own'] - (frame['Own'].mean() / 1.5)) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Base Own%'] = np_where(
frame['Base Own%'] > 85,
85,
frame['Base Own%']
)
# Calculate Small Field Own%
frame['Small Field Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (6 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Small Field Own%'] = np_where(
frame['Small Field Own%'] > 85,
85,
frame['Small Field Own%']
)
# Calculate Large Field Own%
frame['Large Field Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (2.5 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Large Field Own%'] = np_where(
frame['Large Field Own%'] > 85,
85,
frame['Large Field Own%']
)
# Calculate Cash Own%
frame['Cash Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (8 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Cash Own%'] = np_where(
frame['Cash Own%'] > 85,
85,
frame['Cash Own%']
)
return frame
# Apply the function to each dataframe
basic_own_df = calculate_ownership(basic_own_df)
own_norm_var_reg = 900 / basic_own_df['Own'].sum()
own_norm_var_small = 900 / basic_own_df['Small Field Own%'].sum()
own_norm_var_large = 900 / basic_own_df['Large Field Own%'].sum()
own_norm_var_cash = 900 / basic_own_df['Cash Own%'].sum()
basic_own_df['Own'] = basic_own_df['Own'] * own_norm_var_reg
basic_own_df['Small_Own'] = basic_own_df['Small Field Own%'] * own_norm_var_small
basic_own_df['Large_Own'] = basic_own_df['Large Field Own%'] * own_norm_var_large
basic_own_df['Cash_Own'] = basic_own_df['Cash Own%'] * own_norm_var_cash
basic_own_df['Own'] = np_where(basic_own_df['Own'] > 90, 90, basic_own_df['Own'])
# Apply the function to each dataframe
basic_own_df = calculate_ownership(basic_own_df)
own_norm_var_reg = 900 / basic_own_df['Own'].sum()
own_norm_var_small = 900 / basic_own_df['Small Field Own%'].sum()
own_norm_var_large = 900 / basic_own_df['Large Field Own%'].sum()
own_norm_var_cash = 900 / basic_own_df['Cash Own%'].sum()
basic_own_df['Own'] = basic_own_df['Own'] * own_norm_var_reg
basic_own_df['Small_Own'] = basic_own_df['Small Field Own%'] * own_norm_var_small
basic_own_df['Large_Own'] = basic_own_df['Large Field Own%'] * own_norm_var_large
basic_own_df['Cash_Own'] = basic_own_df['Cash Own%'] * own_norm_var_cash
own_dict = dict(zip(basic_own_df.Player, basic_own_df.Own))
small_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Small Field Own%']))
large_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Large Field Own%']))
cash_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Cash Own%']))
team_dict = dict(zip(basic_own_df.name_team, basic_own_df.Team))
opp_dict = dict(zip(basic_own_df.Player, basic_own_df.Opp))
flex_file = basic_own_df[['Player', 'Position', 'Salary', 'Median']]
flex_file = flex_file.rename(columns={"Agg": "Median"})
flex_file['Floor'] = (flex_file['Median'] * floor_var)
flex_file['Ceiling'] = flex_file['Median'] + (5 * ceiling_var)
flex_file['STD'] = (flex_file['Median'] / std_var)
flex_file = flex_file[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
flex_file = flex_file.reset_index(drop=True)
hold_file = flex_file.copy()
overall_file = flex_file.copy()
salary_file = flex_file.copy()
try:
overall_floor_gpu = np_array(overall_file['Floor'])
overall_ceiling_gpu = np_array(overall_file['Ceiling'])
overall_median_gpu = np_array(overall_file['Median'])
overall_std_gpu = np_array(overall_file['STD'])
overall_salary_gpu = np_array(overall_file['Salary'])
data_shape = (len(overall_file['Player']), total_sims) # Example: 1000 rows
salary_array = np_zeros(data_shape)
sim_array = np_zeros(data_shape)
for x in range(0, total_sims):
result_gpu = overall_salary_gpu
salary_array[:, x] = result_gpu
cupy_array = salary_array
salary_file = salary_file.reset_index(drop=True)
salary_cupy = DataFrame(cupy_array, columns=list(range(0, total_sims)))
salary_check_file = pd_concat([salary_file, salary_cupy], axis=1)
except:
for x in range(0,total_sims):
salary_file[x] = salary_file['Salary']
salary_check_file = salary_file.copy()
salary_file=salary_check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
salary_file = salary_file.div(1000)
try:
for x in range(0, total_sims):
if distribution_type == 'normal':
# Normal distribution (existing logic)
result_gpu = np_random.normal(overall_median_gpu, overall_std_gpu)
elif distribution_type == 'poisson':
# Poisson distribution - using median as lambda
result_gpu = np_random.poisson(overall_median_gpu)
elif distribution_type == 'bimodal':
# Bimodal distribution - mixture of two normal distributions
# First peak centered at 80% of median, second at 120% of median
if np_random.random() < 0.5:
result_gpu = np_random.normal(overall_floor_gpu, overall_std_gpu)
else:
result_gpu = np_random.normal(overall_ceiling_gpu, overall_std_gpu)
else:
raise ValueError("Invalid distribution type. Must be 'normal', 'poisson', or 'bimodal'")
sim_array[:, x] = result_gpu
add_array = sim_array
overall_file = overall_file.reset_index(drop=True)
df2 = DataFrame(add_array, columns=list(range(0, total_sims)))
check_file = pd_concat([overall_file, df2], axis=1)
except:
for x in range(0,total_sims):
if distribution_type == 'normal':
overall_file[x] = np_random.normal(overall_file['Median'], overall_file['STD'])
elif distribution_type == 'poisson':
overall_file[x] = np_random.poisson(overall_file['Median'])
elif distribution_type == 'bimodal':
# Bimodal distribution fallback
if np_random.random() < 0.5:
overall_file[x] = np_random.normal(overall_file['Median'] * 0.8, overall_file['STD'])
else:
overall_file[x] = np_random.normal(overall_file['Median'] * 1.2, overall_file['STD'])
check_file = overall_file.copy()
overall_file=check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
players_only = hold_file[['Player']]
raw_lineups_file = players_only
for x in range(0,total_sims):
maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
players_only[x] = raw_lineups_file[x].rank(ascending=False)
players_only=players_only.drop(['Player'], axis=1)
salary_2x_check = (overall_file - (salary_file*2))
salary_3x_check = (overall_file - (salary_file*3))
salary_4x_check = (overall_file - (salary_file*4))
gpp_check = (overall_file - ((salary_file*5)+10))
players_only['Average_Rank'] = players_only.mean(axis=1)
players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
players_only['20+%'] = overall_file[overall_file >= 20].count(axis=1)/float(total_sims)
players_only['2x%'] = salary_2x_check[salary_2x_check >= 1].count(axis=1)/float(total_sims)
players_only['3x%'] = salary_3x_check[salary_3x_check >= 1].count(axis=1)/float(total_sims)
players_only['4x%'] = salary_4x_check[salary_4x_check >= 1].count(axis=1)/float(total_sims)
players_only['GPP%'] = gpp_check[gpp_check >= 1].count(axis=1)/float(total_sims)
players_only['Player'] = hold_file[['Player']]
final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%']]
final_Proj = pd_merge(hold_file, final_outcomes, on="Player")
final_Proj = final_Proj[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%']]
final_Proj['name_team'] = final_Proj['Player'] + final_Proj['Position']
final_Proj['Own'] = final_Proj['Player'].map(own_dict)
final_Proj['Small_Own'] = final_Proj['Player'].map(small_own_dict)
final_Proj['Large_Own'] = final_Proj['Player'].map(large_own_dict)
final_Proj['Cash_Own'] = final_Proj['Player'].map(cash_own_dict)
final_Proj['Team'] = final_Proj['name_team'].map(team_dict)
final_Proj['Opp'] = final_Proj['Player'].map(opp_dict)
final_Proj = final_Proj[['Player', 'Position', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%',
'Own', 'Small_Own', 'Large_Own', 'Cash_Own']]
final_Proj['Salary'] = final_Proj['Salary'].astype(int)
final_Proj = final_Proj.sort_values(by='Median', ascending=False)
return final_Proj.copy()