James McCool
Adjust player column extraction logic in `load_file.py` to improve data handling
b9bf803
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import time | |
from fuzzywuzzy import process | |
## import global functions | |
from global_func.clean_player_name import clean_player_name | |
def load_file(upload): | |
pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF'] | |
if upload is not None: | |
try: | |
if upload.name.endswith('.csv'): | |
raw_df = pd.read_csv(upload) | |
elif upload.name.endswith(('.xls', '.xlsx')): | |
raw_df = pd.read_excel(upload) | |
else: | |
st.error('Please upload either a CSV or Excel file') | |
return None | |
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']] | |
df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'}) | |
# Split EntryName into base name and entry count | |
df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True) | |
df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)') | |
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count | |
# Split the lineup string by replacing position indicators with commas | |
# We need to ensure we only replace position indicators that are at the start of a player entry | |
# and not those that might appear within player names | |
df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True) | |
# Split into individual columns and remove position indicators | |
# First, determine the maximum number of players in any lineup | |
max_players = int(df['Lineup'].str.split(',').str.len().max()) | |
if max_players <= 0: | |
st.error('No valid lineups found in the uploaded file') | |
return None | |
# Create columns for each player | |
for i in range(1, max_players - 1): | |
df[i] = df['Lineup'].str.split(',').str[i].str.strip() | |
# Remove position indicators from the end of each entry | |
df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True) | |
position_dict = dict(zip(df['Player'], df['Pos'])) | |
ownership_dict = dict(zip(df['Player'], df['Own'])) | |
cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS']) | |
entry_list = list(set(df['BaseName'])) | |
entry_list.sort() | |
return cleaned_df, position_dict, ownership_dict, entry_list | |
except Exception as e: | |
st.error(f'Error loading file: {str(e)}') | |
return None | |
return None |