Spaces:

Multichem-PD
/

DFS_Contest_Analyzer

Running

DFS_Contest_Analyzer / global_func /load_contest_file.py

James McCool

Refactor load_contest_file function to include contest type parameter for improved data handling

440bba8 12 days ago

7.47 kB

	import streamlit as st
	import pandas as pd
	from rapidfuzz import process, fuzz

	def load_contest_file(upload, type, helper = None, sport = None):
	if upload is not None:
	try:
	try:
	if upload.name.endswith('.csv'):
	raw_df = pd.read_csv(upload)
	elif upload.name.endswith(('.xls', '.xlsx')):
	raw_df = pd.read_excel(upload)
	else:
	st.error('Please upload either a CSV or Excel file')
	return None
	except:
	raw_df = upload
	if helper is not None:
	helper_df = helper

	print('Made it through initial upload')

	# Select and rename essential columns for the actual upload
	if helper is None:
	df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
	else:
	df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
	df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})

	print('Made it through rename')

	# Split EntryName into base name and entry count
	df['BaseName'] = df['EntryName'].str.replace(r'\s*$\d+/\d+$$', '', regex=True)
	df['EntryCount'] = df['EntryName'].str.extract(r'$(\d+/\d+)$')
	df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count

	# Convert ownership percentage to float
	try:
	df['Own'] = df['Own'].str.replace('%', '').astype(float)
	except:
	df['Own'] = df['Own'].astype(float)

	print('Made it through ownership conversion')

	# Select and rename essential columns for the actual upload
	if helper is not None:
	df_helper = helper_df[['Player', 'Salary', 'Team']]

	print('Made it through helper')

	contest_names = df.Player.unique()
	if helper is not None:
	helper_names = helper_df.Player.unique()

	contest_match_dict = {}
	helper_match_dict = {}
	for names in contest_names:
	match = process.extractOne(
	names,
	helper_names,
	score_cutoff = 85
	)
	if match:
	contest_match_dict[names] = match[0]
	else:
	contest_match_dict[names] = names

	for names in helper_names:
	match = process.extractOne(
	names,
	contest_names,
	score_cutoff = 85
	)
	if match:
	helper_match_dict[names] = match[0]
	else:
	helper_match_dict[names] = names

	for key, value in helper_match_dict.items():
	if key not in contest_match_dict:
	contest_match_dict[key] = value

	df_helper['Player'] = df_helper['Player'].map(contest_match_dict)
	df_helper = df_helper.drop_duplicates(subset='Player', keep='first')

	# Create separate dataframes for different player attributes
	if helper is not None:
	ownership_df = df[['Player', 'Own']]
	fpts_df = df[['Player', 'FPTS']]
	salary_df = df_helper[['Player', 'Salary']]
	team_df = df_helper[['Player', 'Team']]
	pos_df = df[['Player', 'Pos']]
	else:
	ownership_df = df[['Player', 'Own']]
	fpts_df = df[['Player', 'FPTS']]
	salary_df = df[['Player', 'Salary']]
	team_df = df[['Player', 'Team']]
	pos_df = df[['Player', 'Pos']]

	print('Made it through dictionaries')

	# Create the cleaned dataframe with just the essential columns
	cleaned_df = df[['BaseName', 'Lineup']]
	if type == 'Classic':
	if sport == 'MLB':
	cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF '], value=',', regex=True)
	elif sport == 'MMA':
	cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' F ', 'F '], value=',', regex=True)
	elif sport == 'GOLF':
	cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' G ', 'G '], value=',', regex=True)
	print(sport)
	check_lineups = cleaned_df.copy()
	if sport == 'MLB':
	cleaned_df[['Remove', '1B', '2B', '3B', 'C', 'OF1', 'OF2', 'OF3', 'P1', 'P2', 'SS']] = cleaned_df['Lineup'].str.split(',', expand=True)
	elif sport == 'MMA':
	cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
	elif sport == 'GOLF':
	cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
	cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
	entry_counts = cleaned_df['BaseName'].value_counts()
	cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
	if sport == 'MLB':
	cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'P1', 'P2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
	elif sport == 'MMA':
	cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
	elif sport == 'GOLF':
	cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
	st.table(cleaned_df.head(10))
	elif type == 'Showdown':
	cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' UTIL ', 'CPT '], value=',', regex=True)
	print(type)
	check_lineups = cleaned_df.copy()
	cleaned_df[['Remove', 'CPT', 'UTIL', 'UTIL', 'UTIL', 'UTIL', 'UTIL']] = cleaned_df['Lineup'].str.split(',', expand=True)
	cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
	entry_counts = cleaned_df['BaseName'].value_counts()
	cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
	cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'CPT', 'UTIL', 'UTIL', 'UTIL', 'UTIL', 'UTIL']]
	st.table(cleaned_df.head(10))

	print('Made it through check_lineups')

	# Get unique entry names
	entry_list = list(set(df['BaseName'].dropna()))
	entry_list.sort()

	return cleaned_df, ownership_df, fpts_df, entry_list, check_lineups

	except Exception as e:
	st.error(f'Error loading file: {str(e)}')
	return None
	return None