File size: 2,869 Bytes
d04558f
 
 
 
 
 
 
 
 
 
5db8a23
d04558f
 
 
5db8a23
d04558f
5db8a23
d04558f
 
 
 
5db8a23
 
1689df1
 
 
 
 
 
1ba31e0
 
 
 
 
a87b532
8a5c645
ffa4d02
8a5c645
ffa4d02
 
 
 
8a5c645
b9bf803
1ba31e0
fd9eb32
 
5db8a23
 
8a5c645
1689df1
9e80538
d04558f
8a5c645
d04558f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import streamlit as st
import numpy as np
import pandas as pd
import time
from fuzzywuzzy import process

## import global functions
from global_func.clean_player_name import clean_player_name

def load_file(upload):
    pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF']
    if upload is not None:
        try:
            if upload.name.endswith('.csv'):
                raw_df = pd.read_csv(upload)
            elif upload.name.endswith(('.xls', '.xlsx')):
                raw_df = pd.read_excel(upload)
            else:
                st.error('Please upload either a CSV or Excel file')
                return None
            
            df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
            df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
            
            # Split EntryName into base name and entry count
            df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True)
            df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
            df['EntryCount'] = df['EntryCount'].fillna('1/1')  # Default to 1/1 if no entry count
            
            # Split the lineup string by replacing position indicators with commas
            # We need to ensure we only replace position indicators that are at the start of a player entry
            # and not those that might appear within player names
            df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
            
            # Split into individual columns and remove position indicators
            # First, determine the maximum number of players in any lineup
            max_players = int(df['Lineup'].str.split(',').str.len().max())
            
            if max_players <= 0:
                st.error('No valid lineups found in the uploaded file')
                return None
                
            # Create columns for each player
            for i in range(1, max_players - 1):
                df[i] = df['Lineup'].str.split(',').str[i].str.strip()
                # Remove position indicators from the end of each entry
                df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
            position_dict = dict(zip(df['Player'], df['Pos']))
            ownership_dict = dict(zip(df['Player'], df['Own']))
            cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS'])
            entry_list = list(set(df['BaseName']))
            entry_list.sort()
            
            return cleaned_df, position_dict, ownership_dict, entry_list
        except Exception as e:
            st.error(f'Error loading file: {str(e)}')
            return None
    return None