File size: 7,543 Bytes
9c7e08b
 
91e473e
3894e93
9c7e08b
440bba8
9c7e08b
 
5c9b782
 
 
 
 
 
 
 
 
ab18789
dd89b11
 
bbf6bb9
 
9c7e08b
dd89b11
 
 
 
 
9c7e08b
bbf6bb9
 
9c7e08b
 
 
 
 
3894e93
 
9c7e08b
795a6d7
e1f40de
 
 
 
bbf6bb9
 
795a6d7
dd89b11
 
8d72ffa
dd89b11
bbf6bb9
91e473e
 
8dfe988
 
91e473e
 
7740ecc
889410b
91e473e
 
889410b
91e473e
 
 
 
 
 
 
7740ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91e473e
f8c497a
bbf6bb9
795a6d7
dd89b11
 
 
 
 
 
 
 
 
 
 
 
bbf6bb9
 
42712b2
795a6d7
faa4887
440bba8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dc6731
440bba8
 
 
1e08ae6
91e473e
bbf6bb9
9c7e08b
178f6d2
4727314
178f6d2
 
996f8cb
795a6d7
9c7e08b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import streamlit as st
import pandas as pd
from rapidfuzz import process, fuzz
from numpy import where as np_where

def load_contest_file(upload, type, helper = None, sport = None):
    if upload is not None:
        try:
            try:
                if upload.name.endswith('.csv'):
                    raw_df = pd.read_csv(upload)
                elif upload.name.endswith(('.xls', '.xlsx')):
                    raw_df = pd.read_excel(upload)
                else:
                    st.error('Please upload either a CSV or Excel file')
                    return None
            except:
                raw_df = upload
            if helper is not None:
                helper_df = helper

            print('Made it through initial upload')
            
            # Select and rename essential columns for the actual upload
            if helper is None:
                df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
            else:
                df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
            df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})

            print('Made it through rename')
            
            # Split EntryName into base name and entry count
            df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True)
            df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
            df['EntryCount'] = df['EntryCount'].fillna('1/1')  # Default to 1/1 if no entry count
            if type == 'Showdown':
                df['FPTS'] = np_where(df['Pos'] == 'CPT', df['FPTS'] / 1.5, df['FPTS'])
            
            # Convert ownership percentage to float
            try:
                df['Own'] = df['Own'].str.replace('%', '').astype(float)
            except:
                df['Own'] = df['Own'].astype(float)

            print('Made it through ownership conversion')
            
            # Select and rename essential columns for the actual upload
            if helper is not None:
                df_helper = helper_df[['Player', 'Salary', 'Team']]
            
            print('Made it through helper')

            contest_names = df.Player.unique()
            if helper is not None:
                helper_names = helper_df.Player.unique()

            contest_match_dict = {}
            helper_match_dict = {}
            for names in contest_names:
                match = process.extractOne(
                    names,
                    helper_names,
                    score_cutoff = 85
                )
                if match:
                    contest_match_dict[names] = match[0]
                else:
                    contest_match_dict[names] = names
            
            for names in helper_names:
                match = process.extractOne(
                    names,
                    contest_names,
                    score_cutoff = 85
                )
                if match:
                    helper_match_dict[names] = match[0]
                else:
                    helper_match_dict[names] = names
            
            for key, value in helper_match_dict.items():
                if key not in contest_match_dict:
                    contest_match_dict[key] = value
            
            df_helper['Player'] = df_helper['Player'].map(contest_match_dict)
            df_helper = df_helper.drop_duplicates(subset='Player', keep='first')
            
            # Create separate dataframes for different player attributes
            if helper is not None:
                ownership_df = df[['Player', 'Own']]
                fpts_df = df[['Player', 'FPTS']]
                salary_df = df_helper[['Player', 'Salary']]
                team_df = df_helper[['Player', 'Team']]
                pos_df = df[['Player', 'Pos']]
            else:
                ownership_df = df[['Player', 'Own']]
                fpts_df = df[['Player', 'FPTS']]
                salary_df = df[['Player', 'Salary']]
                team_df = df[['Player', 'Team']]
                pos_df = df[['Player', 'Pos']]

            print('Made it through dictionaries')
            
            # Create the cleaned dataframe with just the essential columns
            cleaned_df = df[['BaseName', 'Lineup']]
            if type == 'Classic':
                if sport == 'MLB':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF '], value=',', regex=True)
                elif sport == 'MMA':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' F ', 'F '], value=',', regex=True)
                elif sport == 'GOLF':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' G ', 'G '], value=',', regex=True)
                print(sport)
                check_lineups = cleaned_df.copy()
                if sport == 'MLB':
                    cleaned_df[['Remove', '1B', '2B', '3B', 'C', 'OF1', 'OF2', 'OF3', 'P1', 'P2', 'SS']] = cleaned_df['Lineup'].str.split(',', expand=True)
                elif sport == 'MMA':
                    cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
                elif sport == 'GOLF':
                    cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
                cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
                entry_counts = cleaned_df['BaseName'].value_counts()
                cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
                if sport == 'MLB':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'P1', 'P2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
                elif sport == 'MMA':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
                elif sport == 'GOLF':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
            elif type == 'Showdown':
                cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' UTIL ', 'CPT '], value=',', regex=True)
                print(type)
                check_lineups = cleaned_df.copy()
                cleaned_df[['Remove', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']] = cleaned_df['Lineup'].str.split(',', expand=True)
                cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
                entry_counts = cleaned_df['BaseName'].value_counts()
                cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
                cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']]
            
            print('Made it through check_lineups')
            
            # Get unique entry names
            entry_list = list(set(df['BaseName'].dropna()))
            entry_list.sort()

            return cleaned_df, ownership_df, fpts_df, entry_list, check_lineups
            
        except Exception as e:
            st.error(f'Error loading file: {str(e)}')
            return None
    return None