James McCool
Refine column selection in load_contest_file function for improved data structure
637112e
import streamlit as st | |
import pandas as pd | |
def load_contest_file(upload, sport): | |
if sport == 'MLB': | |
pos_list = [' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF '] | |
if upload is not None: | |
try: | |
try: | |
if upload.name.endswith('.csv'): | |
raw_df = pd.read_csv(upload) | |
elif upload.name.endswith(('.xls', '.xlsx')): | |
raw_df = pd.read_excel(upload) | |
else: | |
st.error('Please upload either a CSV or Excel file') | |
return None | |
except: | |
raw_df = upload | |
# Select and rename essential columns | |
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']] | |
df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'}) | |
# Split EntryName into base name and entry count | |
df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True) | |
df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)') | |
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count | |
# Convert ownership percentage to float | |
try: | |
df['Own'] = df['Own'].str.replace('%', '').astype(float) | |
except: | |
df['Own'] = df['Own'].astype(float) | |
# Create separate dataframes for different player attributes | |
ownership_df = df[['Player', 'Own']] | |
fpts_df = df[['Player', 'FPTS']] | |
salary_df = df[['Player', 'Salary']] | |
team_df = df[['Player', 'Team']] | |
pos_df = df[['Player', 'Pos']] | |
# Create the cleaned dataframe with just the essential columns | |
cleaned_df = df[['BaseName', 'EntryCount', 'Lineup']] | |
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace(pos_list, value=',', regex=True) | |
cleaned_df[['Remove', '1B', '2B', '3B', 'C', 'OF1', 'OF2', 'OF3', 'P1', 'P2', 'SS']] = cleaned_df['Lineup'].str.split(',', expand=True) | |
cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove']) | |
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'P1', 'P2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']] | |
# Get unique entry names | |
entry_list = list(set(df['BaseName'])) | |
entry_list.sort() | |
return cleaned_df, ownership_df, fpts_df, salary_df, team_df, pos_df, entry_list | |
except Exception as e: | |
st.error(f'Error loading file: {str(e)}') | |
return None | |
return None |