James McCool commited on
Commit
795a6d7
·
1 Parent(s): 894a694

Refactor load_contest_file function to streamline data processing

Browse files

- Removed redundant position handling logic and improved the selection of essential columns for clarity.
- Introduced separate dataframes for player attributes, enhancing data organization and accessibility.
- Updated comments to reflect changes and improve code readability, contributing to ongoing efforts to enhance data handling and user experience.

Files changed (1) hide show
  1. global_func/load_contest_file.py +8 -136
global_func/load_contest_file.py CHANGED
@@ -2,11 +2,9 @@ import streamlit as st
2
  import pandas as pd
3
 
4
  def load_contest_file(upload, sport):
5
- pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF']
6
  if upload is not None:
7
  try:
8
  try:
9
-
10
  if upload.name.endswith('.csv'):
11
  raw_df = pd.read_csv(upload)
12
  elif upload.name.endswith(('.xls', '.xlsx')):
@@ -17,6 +15,7 @@ def load_contest_file(upload, sport):
17
  except:
18
  raw_df = upload
19
 
 
20
  df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
21
  df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
22
 
@@ -25,155 +24,28 @@ def load_contest_file(upload, sport):
25
  df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
26
  df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
27
 
28
- # Split the lineup string by replacing position indicators with commas
29
- # We need to ensure we only replace position indicators that are at the start of a player entry
30
- # and not those that might appear within player names
31
- df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
32
-
33
- # Split into individual columns and remove position indicators
34
- # First, determine the maximum number of players in any lineup
35
- max_players = int(df['Lineup'].str.split(',').str.len().max())
36
-
37
- if max_players <= 0:
38
- st.error('No valid lineups found in the uploaded file')
39
- return None
40
-
41
- # Create columns for each player
42
- for i in range(1, max_players):
43
- df[i] = df['Lineup'].str.split(',').str[i].str.strip()
44
- # Remove position indicators from the end of each entry
45
- df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
46
- # Replace None with -1
47
- df[i] = df[i].fillna('-1')
48
-
49
- if sport == 'MLB':
50
- df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
51
  try:
52
  df['Own'] = df['Own'].str.replace('%', '').astype(float)
53
  except:
54
  df['Own'] = df['Own'].astype(float)
 
 
55
  ownership_df = df[['Player', 'Own']]
56
  fpts_df = df[['Player', 'FPTS']]
57
  salary_df = df[['Player', 'Salary']]
58
  team_df = df[['Player', 'Team']]
59
  pos_df = df[['Player', 'Pos']]
60
 
61
- # Create position mapping dictionary
62
- pos_dict = dict(zip(pos_df['Player'], pos_df['Pos']))
63
 
64
- # Debug prints
65
- print("\nPosition Dictionary:")
66
- print(pos_dict)
67
-
68
- print("\nSample Lineup String:")
69
- print(df['Lineup'].iloc[0]) # Print first lineup
70
-
71
- # Function to check if player is eligible for position
72
- def is_eligible_for_position(player, target_pos):
73
- if player not in pos_dict:
74
- print(f"Player not found in pos_dict: {player}")
75
- return False
76
- player_positions = pos_dict[player].split('/')
77
- print(f"Checking {player} for {target_pos}. Player positions: {player_positions}")
78
- # Handle special cases
79
- if target_pos.startswith('SP') and 'P' in player_positions:
80
- return True
81
- if target_pos.startswith('OF') and 'OF' in player_positions:
82
- return True
83
- return target_pos in player_positions
84
-
85
- # Process each lineup
86
- for idx, row in df.iterrows():
87
- print(f"\nProcessing lineup {idx}:")
88
- print(f"Original lineup string: {row['Lineup']}")
89
-
90
- # First split by position indicators to preserve player names
91
- lineup_parts = []
92
- current_part = row['Lineup']
93
- for pos in pos_values:
94
- if pos in current_part:
95
- parts = current_part.split(pos)
96
- if len(parts) > 1:
97
- lineup_parts.append(pos) # Add the position
98
- current_part = parts[1] # Keep the rest for further processing
99
-
100
- # Now split the remaining parts by commas, but only if they're not part of a player name
101
- players = []
102
- current_position = None
103
- for part in lineup_parts:
104
- part = part.strip()
105
- if part in pos_values:
106
- current_position = part
107
- continue
108
-
109
- # Split by comma only if it's followed by a position indicator
110
- if ',' in part:
111
- subparts = part.split(',')
112
- for subpart in subparts:
113
- subpart = subpart.strip()
114
- # Check if this subpart ends with a position
115
- has_position = any(subpart.endswith(pos) for pos in pos_values)
116
- if has_position:
117
- # This is a complete player entry
118
- for pos in pos_values:
119
- if subpart.endswith(pos):
120
- player = subpart[:-len(pos)].strip()
121
- players.append((current_position, player))
122
- current_position = pos
123
- break
124
- else:
125
- # This might be part of a player name (like J.P., Crawford)
126
- # Combine with the next part
127
- if players:
128
- last_pos, last_player = players[-1]
129
- players[-1] = (last_pos, last_player + ',' + subpart)
130
- else:
131
- players.append((current_position, subpart))
132
- else:
133
- # No comma, just clean and add
134
- for pos in pos_values:
135
- if part.endswith(pos):
136
- player = part[:-len(pos)].strip()
137
- players.append((current_position, player))
138
- current_position = pos
139
- break
140
-
141
- print(f"Processed players with positions: {players}")
142
-
143
- # Now fill the positions using the processed players
144
- cleaned_players = [player for _, player in players]
145
-
146
- # First pass: fill required positions (excluding OF)
147
- required_positions = ['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS']
148
- for pos in required_positions:
149
- for position, player in players:
150
- if is_eligible_for_position(player, pos):
151
- print(f"Assigning {player} to {pos}")
152
- df.at[idx, pos] = player
153
- players.remove((position, player))
154
- break
155
- else:
156
- print(f"No player found for {pos}")
157
-
158
- # Second pass: fill OF positions with remaining players
159
- of_positions = ['OF1', 'OF2', 'OF3']
160
- for pos in of_positions:
161
- for position, player in players:
162
- if 'OF' in pos_dict.get(player, '').split('/'):
163
- print(f"Assigning {player} to {pos}")
164
- df.at[idx, pos] = player
165
- players.remove((position, player))
166
- break
167
- else:
168
- print(f"No player found for {pos}, using -1")
169
- df.at[idx, pos] = '-1'
170
-
171
- cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS', 'Salary', 'Team'])
172
- cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
173
  entry_list = list(set(df['BaseName']))
174
  entry_list.sort()
175
 
176
  return cleaned_df, ownership_df, fpts_df, salary_df, team_df, pos_df, entry_list
 
177
  except Exception as e:
178
  st.error(f'Error loading file: {str(e)}')
179
  return None
 
2
  import pandas as pd
3
 
4
  def load_contest_file(upload, sport):
 
5
  if upload is not None:
6
  try:
7
  try:
 
8
  if upload.name.endswith('.csv'):
9
  raw_df = pd.read_csv(upload)
10
  elif upload.name.endswith(('.xls', '.xlsx')):
 
15
  except:
16
  raw_df = upload
17
 
18
+ # Select and rename essential columns
19
  df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
20
  df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
21
 
 
24
  df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
25
  df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
26
 
27
+ # Convert ownership percentage to float
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  try:
29
  df['Own'] = df['Own'].str.replace('%', '').astype(float)
30
  except:
31
  df['Own'] = df['Own'].astype(float)
32
+
33
+ # Create separate dataframes for different player attributes
34
  ownership_df = df[['Player', 'Own']]
35
  fpts_df = df[['Player', 'FPTS']]
36
  salary_df = df[['Player', 'Salary']]
37
  team_df = df[['Player', 'Team']]
38
  pos_df = df[['Player', 'Pos']]
39
 
40
+ # Create the cleaned dataframe with just the essential columns
41
+ cleaned_df = df[['BaseName', 'EntryCount', 'Lineup']]
42
 
43
+ # Get unique entry names
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  entry_list = list(set(df['BaseName']))
45
  entry_list.sort()
46
 
47
  return cleaned_df, ownership_df, fpts_df, salary_df, team_df, pos_df, entry_list
48
+
49
  except Exception as e:
50
  st.error(f'Error loading file: {str(e)}')
51
  return None