DFS_Contest_Analyzer / global_func /load_contest_file.py
James McCool
Refactor load_contest_file function to include contest type parameter for improved data handling
440bba8
raw
history blame
7.47 kB
import streamlit as st
import pandas as pd
from rapidfuzz import process, fuzz
def load_contest_file(upload, type, helper = None, sport = None):
if upload is not None:
try:
try:
if upload.name.endswith('.csv'):
raw_df = pd.read_csv(upload)
elif upload.name.endswith(('.xls', '.xlsx')):
raw_df = pd.read_excel(upload)
else:
st.error('Please upload either a CSV or Excel file')
return None
except:
raw_df = upload
if helper is not None:
helper_df = helper
print('Made it through initial upload')
# Select and rename essential columns for the actual upload
if helper is None:
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
else:
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
print('Made it through rename')
# Split EntryName into base name and entry count
df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True)
df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
# Convert ownership percentage to float
try:
df['Own'] = df['Own'].str.replace('%', '').astype(float)
except:
df['Own'] = df['Own'].astype(float)
print('Made it through ownership conversion')
# Select and rename essential columns for the actual upload
if helper is not None:
df_helper = helper_df[['Player', 'Salary', 'Team']]
print('Made it through helper')
contest_names = df.Player.unique()
if helper is not None:
helper_names = helper_df.Player.unique()
contest_match_dict = {}
helper_match_dict = {}
for names in contest_names:
match = process.extractOne(
names,
helper_names,
score_cutoff = 85
)
if match:
contest_match_dict[names] = match[0]
else:
contest_match_dict[names] = names
for names in helper_names:
match = process.extractOne(
names,
contest_names,
score_cutoff = 85
)
if match:
helper_match_dict[names] = match[0]
else:
helper_match_dict[names] = names
for key, value in helper_match_dict.items():
if key not in contest_match_dict:
contest_match_dict[key] = value
df_helper['Player'] = df_helper['Player'].map(contest_match_dict)
df_helper = df_helper.drop_duplicates(subset='Player', keep='first')
# Create separate dataframes for different player attributes
if helper is not None:
ownership_df = df[['Player', 'Own']]
fpts_df = df[['Player', 'FPTS']]
salary_df = df_helper[['Player', 'Salary']]
team_df = df_helper[['Player', 'Team']]
pos_df = df[['Player', 'Pos']]
else:
ownership_df = df[['Player', 'Own']]
fpts_df = df[['Player', 'FPTS']]
salary_df = df[['Player', 'Salary']]
team_df = df[['Player', 'Team']]
pos_df = df[['Player', 'Pos']]
print('Made it through dictionaries')
# Create the cleaned dataframe with just the essential columns
cleaned_df = df[['BaseName', 'Lineup']]
if type == 'Classic':
if sport == 'MLB':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF '], value=',', regex=True)
elif sport == 'MMA':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' F ', 'F '], value=',', regex=True)
elif sport == 'GOLF':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' G ', 'G '], value=',', regex=True)
print(sport)
check_lineups = cleaned_df.copy()
if sport == 'MLB':
cleaned_df[['Remove', '1B', '2B', '3B', 'C', 'OF1', 'OF2', 'OF3', 'P1', 'P2', 'SS']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'MMA':
cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'GOLF':
cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
entry_counts = cleaned_df['BaseName'].value_counts()
cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
if sport == 'MLB':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'P1', 'P2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
elif sport == 'MMA':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
elif sport == 'GOLF':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
st.table(cleaned_df.head(10))
elif type == 'Showdown':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' UTIL ', 'CPT '], value=',', regex=True)
print(type)
check_lineups = cleaned_df.copy()
cleaned_df[['Remove', 'CPT', 'UTIL', 'UTIL', 'UTIL', 'UTIL', 'UTIL']] = cleaned_df['Lineup'].str.split(',', expand=True)
cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
entry_counts = cleaned_df['BaseName'].value_counts()
cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'CPT', 'UTIL', 'UTIL', 'UTIL', 'UTIL', 'UTIL']]
st.table(cleaned_df.head(10))
print('Made it through check_lineups')
# Get unique entry names
entry_list = list(set(df['BaseName'].dropna()))
entry_list.sort()
return cleaned_df, ownership_df, fpts_df, entry_list, check_lineups
except Exception as e:
st.error(f'Error loading file: {str(e)}')
return None
return None