import streamlit as st import pandas as pd from rapidfuzz import process, fuzz from numpy import where as np_where def load_contest_file(upload, type, helper = None, sport = None): if upload is not None: try: try: if upload.name.endswith('.csv'): raw_df = pd.read_csv(upload) elif upload.name.endswith(('.xls', '.xlsx')): raw_df = pd.read_excel(upload) else: st.error('Please upload either a CSV or Excel file') return None except: raw_df = upload if helper is not None: helper_df = helper print('Made it through initial upload') # Select and rename essential columns for the actual upload if helper is None: df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']] else: df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']] df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'}) print('Made it through rename') # Split EntryName into base name and entry count df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True) df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)') df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count if type == 'Showdown': df['FPTS'] = np_where(df['Pos'] == 'CPT', df['FPTS'] / 1.5, df['FPTS']) # Convert ownership percentage to float try: df['Own'] = df['Own'].str.replace('%', '').astype(float) except: df['Own'] = df['Own'].astype(float) print('Made it through ownership conversion') # Select and rename essential columns for the actual upload if helper is not None: df_helper = helper_df[['Player', 'Salary', 'Team']] print('Made it through helper') contest_names = df.Player.unique() if helper is not None: helper_names = helper_df.Player.unique() contest_match_dict = {} helper_match_dict = {} for names in contest_names: match = process.extractOne( names, helper_names, score_cutoff = 85 ) if match: contest_match_dict[names] = match[0] else: contest_match_dict[names] = names for names in helper_names: match = process.extractOne( names, contest_names, score_cutoff = 85 ) if match: helper_match_dict[names] = match[0] else: helper_match_dict[names] = names for key, value in helper_match_dict.items(): if key not in contest_match_dict: contest_match_dict[key] = value df_helper['Player'] = df_helper['Player'].map(contest_match_dict) # df['Player'] = df['Player'].map(contest_match_dict) df_helper = df_helper.drop_duplicates(subset='Player', keep='first') # df = df.drop_duplicates(subset='Player', keep='first') # Create separate dataframes for different player attributes if helper is not None: ownership_df = df[['Player', 'Own']] fpts_df = df[['Player', 'FPTS']] salary_df = df_helper[['Player', 'Salary']] team_df = df_helper[['Player', 'Team']] pos_df = df[['Player', 'Pos']] else: ownership_df = df[['Player', 'Own']] fpts_df = df[['Player', 'FPTS']] salary_df = df[['Player', 'Salary']] team_df = df[['Player', 'Team']] pos_df = df[['Player', 'Pos']] print('Made it through dictionaries') # Create the cleaned dataframe with just the essential columns cleaned_df = df[['BaseName', 'Lineup']] if type == 'Classic': if sport == 'MLB': cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF '], value=',', regex=True) elif sport == 'MMA': cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' F ', 'F '], value=',', regex=True) elif sport == 'GOLF': cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' G ', 'G '], value=',', regex=True) print(sport) check_lineups = cleaned_df.copy() if sport == 'MLB': cleaned_df[['Remove', '1B', '2B', '3B', 'C', 'OF1', 'OF2', 'OF3', 'P1', 'P2', 'SS']] = cleaned_df['Lineup'].str.split(',', expand=True) elif sport == 'MMA': cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True) elif sport == 'GOLF': cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True) cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove']) entry_counts = cleaned_df['BaseName'].value_counts() cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts) if sport == 'MLB': cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'P1', 'P2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']] elif sport == 'MMA': cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] elif sport == 'GOLF': cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] elif type == 'Showdown': if sport == 'NHL': cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' FLEX ', 'CPT '], value=',', regex=True) else: cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' UTIL ', 'CPT '], value=',', regex=True) print(type) check_lineups = cleaned_df.copy() cleaned_df[['Remove', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']] = cleaned_df['Lineup'].str.split(',', expand=True) cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove']) entry_counts = cleaned_df['BaseName'].value_counts() cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts) cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']] print('Made it through check_lineups') # Get unique entry names entry_list = list(set(df['BaseName'].dropna())) entry_list.sort() return cleaned_df, ownership_df, fpts_df, entry_list, check_lineups except Exception as e: st.error(f'Error loading file: {str(e)}') return None return None