import streamlit as st import numpy as np import pandas as pd import time from fuzzywuzzy import process ## import global functions from global_func.clean_player_name import clean_player_name def load_contest_file(upload, sport): pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF'] if upload is not None: try: if upload.name.endswith('.csv'): raw_df = pd.read_csv(upload) elif upload.name.endswith(('.xls', '.xlsx')): raw_df = pd.read_excel(upload) else: st.error('Please upload either a CSV or Excel file') return None df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']] df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'}) # Split EntryName into base name and entry count df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True) df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)') df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count # Split the lineup string by replacing position indicators with commas # We need to ensure we only replace position indicators that are at the start of a player entry # and not those that might appear within player names df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True) # Split into individual columns and remove position indicators # First, determine the maximum number of players in any lineup max_players = int(df['Lineup'].str.split(',').str.len().max()) if max_players <= 0: st.error('No valid lineups found in the uploaded file') return None # Create columns for each player for i in range(1, max_players): df[i] = df['Lineup'].str.split(',').str[i].str.strip() # Remove position indicators from the end of each entry df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True) if sport == 'MLB': df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'}) df['Own'] = df['Own'].str.replace('%', '').astype(float) ownership_dict = dict(zip(df['Player'], df['Own'])) fpts_dict = dict(zip(df['Player'], df['FPTS'])) cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS']) cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']] entry_list = list(set(df['BaseName'])) entry_list.sort() return cleaned_df, ownership_dict, fpts_dict, entry_list except Exception as e: st.error(f'Error loading file: {str(e)}') return None return None