Spaces:

Multichem-PD
/

Custom_ROO_Tool

Running

Custom_ROO_Tool / function_hold /MLB_functions.py

James McCool

Add MLB support to ROO build functions and Streamlit display, including percentage formatting and data upload instructions

95b08b3 3 months ago

raw

history blame

22.7 kB

	from numpy import nan as np_nan
	from numpy import where as np_where
	from numpy import random as np_random
	from numpy import zeros as np_zeros
	from numpy import array as np_array
	from pandas import concat as pd_concat
	from pandas import merge as pd_merge
	from pandas import DataFrame

	def DK_MLB_ROO_Build(projections_file, floor_var, ceiling_var, std_var, distribution_type):
	sp_frame = projections_file[projections_file['Position'].str.contains('P')]
	hit_frame = projections_file[~projections_file['Position'].str.contains('P')]
	sp_norm_var = 200 / sp_frame['Own'].sum()
	sp_frame['Own'] = sp_frame['Own'] * sp_norm_var
	hit_norm_var = 800 / hit_frame['Own'].sum()
	hit_frame['Own'] = hit_frame['Own'] * hit_norm_var

	working_roo = pd_concat([sp_frame, hit_frame])

	own_dict = dict(zip(working_roo.Player, working_roo.Own))
	team_dict = dict(zip(working_roo.Player, working_roo.Team))
	player_id_dict = dict(zip(working_roo.Player, working_roo.player_ID))
	total_sims = 1000

	basic_own_df = working_roo.copy()
	basic_own_df['name_team'] = basic_own_df['Player'] + basic_own_df['Position']

	def calculate_ownership(df):
	# Filter the dataframe based on the position
	frame = df.copy()

	# Calculate Small Field Own%
	frame['Base Own%'] = np_where(
	(frame['Own'] - frame['Own'].mean() >= 0),
	frame['Own'] * (5 * (frame['Own'] - (frame['Own'].mean() / 1.5)) / 100) + frame['Own'].mean(),
	frame['Own']
	)
	frame['Base Own%'] = np_where(
	frame['Base Own%'] > 85,
	85,
	frame['Base Own%']
	)

	# Calculate Small Field Own%
	frame['Small Field Own%'] = np_where(
	(frame['Own'] - frame['Own'].mean() >= 0),
	frame['Own'] * (6 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
	frame['Own']
	)
	frame['Small Field Own%'] = np_where(
	frame['Small Field Own%'] > 85,
	85,
	frame['Small Field Own%']
	)

	# Calculate Large Field Own%
	frame['Large Field Own%'] = np_where(
	(frame['Own'] - frame['Own'].mean() >= 0),
	frame['Own'] * (2.5 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
	frame['Own']
	)
	frame['Large Field Own%'] = np_where(
	frame['Large Field Own%'] > 85,
	85,
	frame['Large Field Own%']
	)

	# Calculate Cash Own%
	frame['Cash Own%'] = np_where(
	(frame['Own'] - frame['Own'].mean() >= 0),
	frame['Own'] * (8 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
	frame['Own']
	)
	frame['Cash Own%'] = np_where(
	frame['Cash Own%'] > 85,
	85,
	frame['Cash Own%']
	)

	return frame

	# Apply the function to each dataframe
	basic_own_df = calculate_ownership(basic_own_df)

	own_norm_var_reg = 1000 / basic_own_df['Own'].sum()
	own_norm_var_small = 1000 / basic_own_df['Small Field Own%'].sum()
	own_norm_var_large = 1000 / basic_own_df['Large Field Own%'].sum()
	own_norm_var_cash = 1000 / basic_own_df['Cash Own%'].sum()
	basic_own_df['Own'] = basic_own_df['Own'] * own_norm_var_reg
	basic_own_df['Small_Own'] = basic_own_df['Small Field Own%'] * own_norm_var_small
	basic_own_df['Large_Own'] = basic_own_df['Large Field Own%'] * own_norm_var_large
	basic_own_df['Cash_Own'] = basic_own_df['Cash Own%'] * own_norm_var_cash

	basic_own_df['Own'] = np_where(basic_own_df['Own'] > 90, 90, basic_own_df['Own'])

	# Apply the function to each dataframe
	basic_own_df = calculate_ownership(basic_own_df)

	own_norm_var_reg = 1000 / basic_own_df['Own'].sum()
	own_norm_var_small = 1000 / basic_own_df['Small Field Own%'].sum()
	own_norm_var_large = 1000 / basic_own_df['Large Field Own%'].sum()
	own_norm_var_cash = 1000 / basic_own_df['Cash Own%'].sum()
	basic_own_df['Own'] = basic_own_df['Own'] * own_norm_var_reg
	basic_own_df['Small_Own'] = basic_own_df['Small Field Own%'] * own_norm_var_small
	basic_own_df['Large_Own'] = basic_own_df['Large Field Own%'] * own_norm_var_large
	basic_own_df['Cash_Own'] = basic_own_df['Cash Own%'] * own_norm_var_cash

	own_dict = dict(zip(basic_own_df.Player, basic_own_df.Own))
	small_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Small Field Own%']))
	large_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Large Field Own%']))
	cash_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Cash Own%']))
	team_dict = dict(zip(basic_own_df.name_team, basic_own_df.Team))
	opp_dict = dict(zip(basic_own_df.Player, basic_own_df.Opp))

	flex_file = basic_own_df[['Player', 'Position', 'Salary', 'Median']]
	flex_file = flex_file.rename(columns={"Agg": "Median"})
	flex_file['Floor'] = (flex_file['Median'] * floor_var)
	flex_file['Ceiling'] = flex_file['Median'] + (5 * ceiling_var)
	flex_file['STD'] = (flex_file['Median'] / std_var)
	flex_file = flex_file[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
	flex_file = flex_file.reset_index(drop=True)
	hold_file = flex_file.copy()
	overall_file = flex_file.copy()
	salary_file = flex_file.copy()

	try:
	overall_floor_gpu = np_array(overall_file['Floor'])
	overall_ceiling_gpu = np_array(overall_file['Ceiling'])
	overall_median_gpu = np_array(overall_file['Median'])
	overall_std_gpu = np_array(overall_file['STD'])
	overall_salary_gpu = np_array(overall_file['Salary'])

	data_shape = (len(overall_file['Player']), total_sims) # Example: 1000 rows
	salary_array = np_zeros(data_shape)
	sim_array = np_zeros(data_shape)

	for x in range(0, total_sims):
	result_gpu = overall_salary_gpu
	salary_array[:, x] = result_gpu
	cupy_array = salary_array

	salary_file = salary_file.reset_index(drop=True)
	salary_cupy = DataFrame(cupy_array, columns=list(range(0, total_sims)))
	salary_check_file = pd_concat([salary_file, salary_cupy], axis=1)
	except:
	for x in range(0,total_sims):
	salary_file[x] = salary_file['Salary']
	salary_check_file = salary_file.copy()

	salary_file=salary_check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)

	salary_file = salary_file.div(1000)

	try:
	for x in range(0, total_sims):
	if distribution_type == 'normal':
	# Normal distribution (existing logic)
	result_gpu = np_random.normal(overall_median_gpu, overall_std_gpu)
	elif distribution_type == 'poisson':
	# Poisson distribution - using median as lambda
	result_gpu = np_random.poisson(overall_median_gpu)
	elif distribution_type == 'bimodal':
	# Bimodal distribution - mixture of two normal distributions
	# First peak centered at 80% of median, second at 120% of median
	if np_random.random() < 0.5:
	result_gpu = np_random.normal(overall_floor_gpu, overall_std_gpu)
	else:
	result_gpu = np_random.normal(overall_ceiling_gpu, overall_std_gpu)
	else:
	raise ValueError("Invalid distribution type. Must be 'normal', 'poisson', or 'bimodal'")

	sim_array[:, x] = result_gpu
	add_array = sim_array

	overall_file = overall_file.reset_index(drop=True)
	df2 = DataFrame(add_array, columns=list(range(0, total_sims)))
	check_file = pd_concat([overall_file, df2], axis=1)
	except:
	for x in range(0,total_sims):
	if distribution_type == 'normal':
	overall_file[x] = np_random.normal(overall_file['Median'], overall_file['STD'])
	elif distribution_type == 'poisson':
	overall_file[x] = np_random.poisson(overall_file['Median'])
	elif distribution_type == 'bimodal':
	# Bimodal distribution fallback
	if np_random.random() < 0.5:
	overall_file[x] = np_random.normal(overall_file['Median'] * 0.8, overall_file['STD'])
	else:
	overall_file[x] = np_random.normal(overall_file['Median'] * 1.2, overall_file['STD'])
	check_file = overall_file.copy()

	overall_file=check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)

	players_only = hold_file[['Player']]
	raw_lineups_file = players_only

	for x in range(0,total_sims):
	maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
	raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
	players_only[x] = raw_lineups_file[x].rank(ascending=False)

	players_only=players_only.drop(['Player'], axis=1)

	salary_2x_check = (overall_file - (salary_file*2))
	salary_3x_check = (overall_file - (salary_file*3))
	salary_4x_check = (overall_file - (salary_file*4))
	gpp_check = (overall_file - ((salary_file*5)+10))

	players_only['Average_Rank'] = players_only.mean(axis=1)
	players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
	players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
	players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
	players_only['20+%'] = overall_file[overall_file >= 20].count(axis=1)/float(total_sims)
	players_only['2x%'] = salary_2x_check[salary_2x_check >= 1].count(axis=1)/float(total_sims)
	players_only['3x%'] = salary_3x_check[salary_3x_check >= 1].count(axis=1)/float(total_sims)
	players_only['4x%'] = salary_4x_check[salary_4x_check >= 1].count(axis=1)/float(total_sims)
	players_only['GPP%'] = gpp_check[gpp_check >= 1].count(axis=1)/float(total_sims)

	players_only['Player'] = hold_file[['Player']]

	final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%']]

	final_Proj = pd_merge(hold_file, final_outcomes, on="Player")
	final_Proj = final_Proj[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%']]

	final_Proj['name_team'] = final_Proj['Player'] + final_Proj['Position']
	final_Proj['Own'] = final_Proj['Player'].map(own_dict)
	final_Proj['Small_Own'] = final_Proj['Player'].map(small_own_dict)
	final_Proj['Large_Own'] = final_Proj['Player'].map(large_own_dict)
	final_Proj['Cash_Own'] = final_Proj['Player'].map(cash_own_dict)
	final_Proj['Team'] = final_Proj['name_team'].map(team_dict)
	final_Proj['Opp'] = final_Proj['Player'].map(opp_dict)

	final_Proj = final_Proj[['Player', 'Position', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%',
	'Own', 'Small_Own', 'Large_Own', 'Cash_Own']]
	final_Proj = final_Proj.sort_values(by='Median', ascending=False)

	return final_Proj.copy()

	def FD_MLB_ROO_Build(projections_file, floor_var, ceiling_var, std_var, distribution_type):
	sp_frame = projections_file[projections_file['Position'].str.contains('P')]
	hit_frame = projections_file[~projections_file['Position'].str.contains('P')]
	sp_norm_var = 100 / sp_frame['Own'].sum()
	sp_frame['Own'] = sp_frame['Own'] * sp_norm_var
	hit_norm_var = 800 / hit_frame['Own'].sum()
	hit_frame['Own'] = hit_frame['Own'] * hit_norm_var

	working_roo = pd_concat([sp_frame, hit_frame])

	own_dict = dict(zip(working_roo.Player, working_roo.Own))
	team_dict = dict(zip(working_roo.Player, working_roo.Team))
	player_id_dict = dict(zip(working_roo.Player, working_roo.player_ID))
	total_sims = 1000

	basic_own_df = working_roo.copy()
	basic_own_df['name_team'] = basic_own_df['Player'] + basic_own_df['Position']

	def calculate_ownership(df):
	# Filter the dataframe based on the position
	frame = df.copy()

	# Calculate Small Field Own%
	frame['Base Own%'] = np_where(
	(frame['Own'] - frame['Own'].mean() >= 0),
	frame['Own'] * (5 * (frame['Own'] - (frame['Own'].mean() / 1.5)) / 100) + frame['Own'].mean(),
	frame['Own']
	)
	frame['Base Own%'] = np_where(
	frame['Base Own%'] > 85,
	85,
	frame['Base Own%']
	)

	# Calculate Small Field Own%
	frame['Small Field Own%'] = np_where(
	(frame['Own'] - frame['Own'].mean() >= 0),
	frame['Own'] * (6 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
	frame['Own']
	)
	frame['Small Field Own%'] = np_where(
	frame['Small Field Own%'] > 85,
	85,
	frame['Small Field Own%']
	)

	# Calculate Large Field Own%
	frame['Large Field Own%'] = np_where(
	(frame['Own'] - frame['Own'].mean() >= 0),
	frame['Own'] * (2.5 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
	frame['Own']
	)
	frame['Large Field Own%'] = np_where(
	frame['Large Field Own%'] > 85,
	85,
	frame['Large Field Own%']
	)

	# Calculate Cash Own%
	frame['Cash Own%'] = np_where(
	(frame['Own'] - frame['Own'].mean() >= 0),
	frame['Own'] * (8 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
	frame['Own']
	)
	frame['Cash Own%'] = np_where(
	frame['Cash Own%'] > 85,
	85,
	frame['Cash Own%']
	)

	return frame

	# Apply the function to each dataframe
	basic_own_df = calculate_ownership(basic_own_df)

	own_norm_var_reg = 900 / basic_own_df['Own'].sum()
	own_norm_var_small = 900 / basic_own_df['Small Field Own%'].sum()
	own_norm_var_large = 900 / basic_own_df['Large Field Own%'].sum()
	own_norm_var_cash = 900 / basic_own_df['Cash Own%'].sum()
	basic_own_df['Own'] = basic_own_df['Own'] * own_norm_var_reg
	basic_own_df['Small_Own'] = basic_own_df['Small Field Own%'] * own_norm_var_small
	basic_own_df['Large_Own'] = basic_own_df['Large Field Own%'] * own_norm_var_large
	basic_own_df['Cash_Own'] = basic_own_df['Cash Own%'] * own_norm_var_cash

	basic_own_df['Own'] = np_where(basic_own_df['Own'] > 90, 90, basic_own_df['Own'])

	# Apply the function to each dataframe
	basic_own_df = calculate_ownership(basic_own_df)

	own_norm_var_reg = 900 / basic_own_df['Own'].sum()
	own_norm_var_small = 900 / basic_own_df['Small Field Own%'].sum()
	own_norm_var_large = 900 / basic_own_df['Large Field Own%'].sum()
	own_norm_var_cash = 900 / basic_own_df['Cash Own%'].sum()
	basic_own_df['Own'] = basic_own_df['Own'] * own_norm_var_reg
	basic_own_df['Small_Own'] = basic_own_df['Small Field Own%'] * own_norm_var_small
	basic_own_df['Large_Own'] = basic_own_df['Large Field Own%'] * own_norm_var_large
	basic_own_df['Cash_Own'] = basic_own_df['Cash Own%'] * own_norm_var_cash

	own_dict = dict(zip(basic_own_df.Player, basic_own_df.Own))
	small_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Small Field Own%']))
	large_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Large Field Own%']))
	cash_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Cash Own%']))
	team_dict = dict(zip(basic_own_df.name_team, basic_own_df.Team))
	opp_dict = dict(zip(basic_own_df.Player, basic_own_df.Opp))

	flex_file = basic_own_df[['Player', 'Position', 'Salary', 'Median']]
	flex_file = flex_file.rename(columns={"Agg": "Median"})
	flex_file['Floor'] = (flex_file['Median'] * floor_var)
	flex_file['Ceiling'] = flex_file['Median'] + (5 * ceiling_var)
	flex_file['STD'] = (flex_file['Median'] / std_var)
	flex_file = flex_file[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
	flex_file = flex_file.reset_index(drop=True)
	hold_file = flex_file.copy()
	overall_file = flex_file.copy()
	salary_file = flex_file.copy()

	try:
	overall_floor_gpu = np_array(overall_file['Floor'])
	overall_ceiling_gpu = np_array(overall_file['Ceiling'])
	overall_median_gpu = np_array(overall_file['Median'])
	overall_std_gpu = np_array(overall_file['STD'])
	overall_salary_gpu = np_array(overall_file['Salary'])

	data_shape = (len(overall_file['Player']), total_sims) # Example: 1000 rows
	salary_array = np_zeros(data_shape)
	sim_array = np_zeros(data_shape)

	for x in range(0, total_sims):
	result_gpu = overall_salary_gpu
	salary_array[:, x] = result_gpu
	cupy_array = salary_array

	salary_file = salary_file.reset_index(drop=True)
	salary_cupy = DataFrame(cupy_array, columns=list(range(0, total_sims)))
	salary_check_file = pd_concat([salary_file, salary_cupy], axis=1)
	except:
	for x in range(0,total_sims):
	salary_file[x] = salary_file['Salary']
	salary_check_file = salary_file.copy()

	salary_file=salary_check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)

	salary_file = salary_file.div(1000)

	try:
	for x in range(0, total_sims):
	if distribution_type == 'normal':
	# Normal distribution (existing logic)
	result_gpu = np_random.normal(overall_median_gpu, overall_std_gpu)
	elif distribution_type == 'poisson':
	# Poisson distribution - using median as lambda
	result_gpu = np_random.poisson(overall_median_gpu)
	elif distribution_type == 'bimodal':
	# Bimodal distribution - mixture of two normal distributions
	# First peak centered at 80% of median, second at 120% of median
	if np_random.random() < 0.5:
	result_gpu = np_random.normal(overall_floor_gpu, overall_std_gpu)
	else:
	result_gpu = np_random.normal(overall_ceiling_gpu, overall_std_gpu)
	else:
	raise ValueError("Invalid distribution type. Must be 'normal', 'poisson', or 'bimodal'")

	sim_array[:, x] = result_gpu
	add_array = sim_array

	overall_file = overall_file.reset_index(drop=True)
	df2 = DataFrame(add_array, columns=list(range(0, total_sims)))
	check_file = pd_concat([overall_file, df2], axis=1)
	except:
	for x in range(0,total_sims):
	if distribution_type == 'normal':
	overall_file[x] = np_random.normal(overall_file['Median'], overall_file['STD'])
	elif distribution_type == 'poisson':
	overall_file[x] = np_random.poisson(overall_file['Median'])
	elif distribution_type == 'bimodal':
	# Bimodal distribution fallback
	if np_random.random() < 0.5:
	overall_file[x] = np_random.normal(overall_file['Median'] * 0.8, overall_file['STD'])
	else:
	overall_file[x] = np_random.normal(overall_file['Median'] * 1.2, overall_file['STD'])
	check_file = overall_file.copy()

	overall_file=check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)

	players_only = hold_file[['Player']]
	raw_lineups_file = players_only

	for x in range(0,total_sims):
	maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
	raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
	players_only[x] = raw_lineups_file[x].rank(ascending=False)

	players_only=players_only.drop(['Player'], axis=1)

	salary_2x_check = (overall_file - (salary_file*2))
	salary_3x_check = (overall_file - (salary_file*3))
	salary_4x_check = (overall_file - (salary_file*4))
	gpp_check = (overall_file - ((salary_file*5)+10))

	players_only['Average_Rank'] = players_only.mean(axis=1)
	players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
	players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
	players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
	players_only['20+%'] = overall_file[overall_file >= 20].count(axis=1)/float(total_sims)
	players_only['2x%'] = salary_2x_check[salary_2x_check >= 1].count(axis=1)/float(total_sims)
	players_only['3x%'] = salary_3x_check[salary_3x_check >= 1].count(axis=1)/float(total_sims)
	players_only['4x%'] = salary_4x_check[salary_4x_check >= 1].count(axis=1)/float(total_sims)
	players_only['GPP%'] = gpp_check[gpp_check >= 1].count(axis=1)/float(total_sims)

	players_only['Player'] = hold_file[['Player']]

	final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%']]

	final_Proj = pd_merge(hold_file, final_outcomes, on="Player")
	final_Proj = final_Proj[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%']]

	final_Proj['name_team'] = final_Proj['Player'] + final_Proj['Position']
	final_Proj['Own'] = final_Proj['Player'].map(own_dict)
	final_Proj['Small_Own'] = final_Proj['Player'].map(small_own_dict)
	final_Proj['Large_Own'] = final_Proj['Player'].map(large_own_dict)
	final_Proj['Cash_Own'] = final_Proj['Player'].map(cash_own_dict)
	final_Proj['Team'] = final_Proj['name_team'].map(team_dict)
	final_Proj['Opp'] = final_Proj['Player'].map(opp_dict)

	final_Proj = final_Proj[['Player', 'Position', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'GPP%',
	'Own', 'Small_Own', 'Large_Own', 'Cash_Own']]
	final_Proj['Salary'] = final_Proj['Salary'].astype(int)
	final_Proj = final_Proj.sort_values(by='Median', ascending=False)

	return final_Proj.copy()