DFS_Contest_Analyzer / global_func /load_contest_file.py
James McCool
Refactor lineup replacement logic in load_contest_file.py
c2b7029
raw
history blame
7.85 kB
import streamlit as st
import pandas as pd
from rapidfuzz import process, fuzz
from numpy import where as np_where
def load_contest_file(upload, type, helper = None, sport = None):
if upload is not None:
try:
try:
if upload.name.endswith('.csv'):
raw_df = pd.read_csv(upload)
elif upload.name.endswith(('.xls', '.xlsx')):
raw_df = pd.read_excel(upload)
else:
st.error('Please upload either a CSV or Excel file')
return None
except:
raw_df = upload
if helper is not None:
helper_df = helper
print('Made it through initial upload')
# Select and rename essential columns for the actual upload
if helper is None:
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
else:
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
print('Made it through rename')
# Split EntryName into base name and entry count
df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True)
df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
if type == 'Showdown':
df['FPTS'] = np_where(df['Pos'] == 'CPT', df['FPTS'] / 1.5, df['FPTS'])
# Convert ownership percentage to float
try:
df['Own'] = df['Own'].str.replace('%', '').astype(float)
except:
df['Own'] = df['Own'].astype(float)
print('Made it through ownership conversion')
# Select and rename essential columns for the actual upload
if helper is not None:
df_helper = helper_df[['Player', 'Salary', 'Team']]
print('Made it through helper')
contest_names = df.Player.unique()
if helper is not None:
helper_names = helper_df.Player.unique()
contest_match_dict = {}
helper_match_dict = {}
for names in contest_names:
match = process.extractOne(
names,
helper_names,
score_cutoff = 85
)
if match:
contest_match_dict[names] = match[0]
else:
contest_match_dict[names] = names
for names in helper_names:
match = process.extractOne(
names,
contest_names,
score_cutoff = 85
)
if match:
helper_match_dict[names] = match[0]
else:
helper_match_dict[names] = names
for key, value in helper_match_dict.items():
if key not in contest_match_dict:
contest_match_dict[key] = value
df_helper['Player'] = df_helper['Player'].map(contest_match_dict)
# df['Player'] = df['Player'].map(contest_match_dict)
df_helper = df_helper.drop_duplicates(subset='Player', keep='first')
# df = df.drop_duplicates(subset='Player', keep='first')
# Create separate dataframes for different player attributes
if helper is not None:
ownership_df = df[['Player', 'Own']]
fpts_df = df[['Player', 'FPTS']]
salary_df = df_helper[['Player', 'Salary']]
team_df = df_helper[['Player', 'Team']]
pos_df = df[['Player', 'Pos']]
else:
ownership_df = df[['Player', 'Own']]
fpts_df = df[['Player', 'FPTS']]
salary_df = df[['Player', 'Salary']]
team_df = df[['Player', 'Team']]
pos_df = df[['Player', 'Pos']]
print('Made it through dictionaries')
# Create the cleaned dataframe with just the essential columns
cleaned_df = df[['BaseName', 'Lineup']]
if type == 'Classic':
if sport == 'MLB':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF '], value=',', regex=True)
elif sport == 'MMA':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' F ', 'F '], value=',', regex=True)
elif sport == 'GOLF':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' G ', 'G '], value=',', regex=True)
print(sport)
check_lineups = cleaned_df.copy()
if sport == 'MLB':
cleaned_df[['Remove', '1B', '2B', '3B', 'C', 'OF1', 'OF2', 'OF3', 'P1', 'P2', 'SS']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'MMA':
cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'GOLF':
cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
entry_counts = cleaned_df['BaseName'].value_counts()
cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
if sport == 'MLB':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'P1', 'P2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
elif sport == 'MMA':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
elif sport == 'GOLF':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
elif type == 'Showdown':
if sport == 'NHL':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' FLEX ', 'CPT '], value=',', regex=True)
else:
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' UTIL ', 'CPT '], value=',', regex=True)
print(type)
check_lineups = cleaned_df.copy()
cleaned_df[['Remove', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']] = cleaned_df['Lineup'].str.split(',', expand=True)
cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
entry_counts = cleaned_df['BaseName'].value_counts()
cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']]
print('Made it through check_lineups')
# Get unique entry names
entry_list = list(set(df['BaseName'].dropna()))
entry_list.sort()
return cleaned_df, ownership_df, fpts_df, entry_list, check_lineups
except Exception as e:
st.error(f'Error loading file: {str(e)}')
return None
return None