James McCool
Enhance lineup processing in `load_file.py` to improve data extraction
a87b532
raw
history blame
2.01 kB
import streamlit as st
import numpy as np
import pandas as pd
import time
from fuzzywuzzy import process
## import global functions
from global_func.clean_player_name import clean_player_name
def load_file(upload):
pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF']
if upload is not None:
try:
if upload.name.endswith('.csv'):
raw_df = pd.read_csv(upload)
elif upload.name.endswith(('.xls', '.xlsx')):
raw_df = pd.read_excel(upload)
else:
st.error('Please upload either a CSV or Excel file')
return None
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
# Split the lineup string by replacing position indicators with commas
# We need to ensure we only replace position indicators that are at the start of a player entry
# and not those that might appear within player names
df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
# Split into individual columns and remove position indicators
for i in range(0,10):
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
# Remove position indicators from the beginning of each entry
df[i] = df[i].str.replace(r'^(' + '|'.join(pos_values) + r')\s+', '', regex=True)
position_dict = dict(zip(df['Player'], df['Pos']))
ownership_dict = dict(zip(df['Player'], df['Own']))
entry_list = list(set(df['EntryName']))
entry_list.sort()
return df, position_dict, ownership_dict, entry_list
except Exception as e:
st.error(f'Error loading file: {str(e)}')
return None
return None