Spaces:

Multichem-PD
/

DFS_Contest_Analyzer

Running

File size: 2,869 Bytes

d04558f
 
 
 
 
 
 
 
 
 
5db8a23
d04558f
 
 
5db8a23
d04558f
5db8a23
d04558f
 
 
 
5db8a23
 
1689df1
 
 
 
 
 
1ba31e0
 
 
 
 
a87b532
8a5c645
ffa4d02
8a5c645
ffa4d02
 
 
 
8a5c645
b9bf803
1ba31e0
fd9eb32
 
5db8a23
 
8a5c645
1689df1
9e80538
d04558f
8a5c645
d04558f

import streamlit as st
import numpy as np
import pandas as pd
import time
from fuzzywuzzy import process

## import global functions
from global_func.clean_player_name import clean_player_name

def load_file(upload):
    pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF']
    if upload is not None:
        try:
            if upload.name.endswith('.csv'):
                raw_df = pd.read_csv(upload)
            elif upload.name.endswith(('.xls', '.xlsx')):
                raw_df = pd.read_excel(upload)
            else:
                st.error('Please upload either a CSV or Excel file')
                return None
            
            df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
            df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
            
            # Split EntryName into base name and entry count
            df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True)
            df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
            df['EntryCount'] = df['EntryCount'].fillna('1/1')  # Default to 1/1 if no entry count
            
            # Split the lineup string by replacing position indicators with commas
            # We need to ensure we only replace position indicators that are at the start of a player entry
            # and not those that might appear within player names
            df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
            
            # Split into individual columns and remove position indicators
            # First, determine the maximum number of players in any lineup
            max_players = int(df['Lineup'].str.split(',').str.len().max())
            
            if max_players <= 0:
                st.error('No valid lineups found in the uploaded file')
                return None
                
            # Create columns for each player
            for i in range(1, max_players - 1):
                df[i] = df['Lineup'].str.split(',').str[i].str.strip()
                # Remove position indicators from the end of each entry
                df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
            position_dict = dict(zip(df['Player'], df['Pos']))
            ownership_dict = dict(zip(df['Player'], df['Own']))
            cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS'])
            entry_list = list(set(df['BaseName']))
            entry_list.sort()
            
            return cleaned_df, position_dict, ownership_dict, entry_list
        except Exception as e:
            st.error(f'Error loading file: {str(e)}')
            return None
    return None