James McCool commited on
Commit
3a102e0
·
1 Parent(s): 4a0abde

Enhance lineup string processing in load_contest_file function

Browse files

- Updated regex logic to only replace position indicators at the start of player entries, improving accuracy in lineup formatting.
- Adjusted the removal of position indicators to target only those at the end of each entry, ensuring cleaner data output.
- These changes contribute to ongoing efforts to enhance data integrity and improve user experience within the application.

Files changed (1) hide show
  1. global_func/load_contest_file.py +4 -4
global_func/load_contest_file.py CHANGED
@@ -26,10 +26,10 @@ def load_contest_file(upload, sport):
26
  df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
27
 
28
  # Split the lineup string by replacing position indicators with commas
29
- df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
 
30
 
31
  # Split into individual columns and remove position indicators
32
- # First, determine the maximum number of players in any lineup
33
  max_players = int(df['Lineup'].str.split(',').str.len().max())
34
 
35
  if max_players <= 0:
@@ -39,8 +39,8 @@ def load_contest_file(upload, sport):
39
  # Create columns for each player
40
  for i in range(1, max_players):
41
  df[i] = df['Lineup'].str.split(',').str[i].str.strip()
42
- # Remove position indicators from the end of each entry
43
- df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
44
 
45
  if sport == 'MLB':
46
  df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
 
26
  df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
27
 
28
  # Split the lineup string by replacing position indicators with commas
29
+ # Only replace position indicators that are at the start of a player entry
30
+ df['Lineup'] = df['Lineup'].str.replace(r'(?<!\w)(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
31
 
32
  # Split into individual columns and remove position indicators
 
33
  max_players = int(df['Lineup'].str.split(',').str.len().max())
34
 
35
  if max_players <= 0:
 
39
  # Create columns for each player
40
  for i in range(1, max_players):
41
  df[i] = df['Lineup'].str.split(',').str[i].str.strip()
42
+ # Only remove position indicators that are at the end of the entry
43
+ df[i] = df[i].str.replace(r'\b(' + '|'.join(pos_values) + r')\s*$', '', regex=True)
44
 
45
  if sport == 'MLB':
46
  df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})