James McCool commited on
Commit
0dedc97
·
1 Parent(s): 3a102e0

Refine lineup string processing in load_contest_file function

Browse files

- Updated regex logic to ensure position indicators are only replaced at the start of player entries and not within player names, enhancing accuracy in lineup formatting.
- Adjusted the removal of position indicators to target only those at the end of each entry, ensuring cleaner data output.
- These changes contribute to ongoing efforts to improve data integrity and user experience within the application.

Files changed (1) hide show
  1. global_func/load_contest_file.py +6 -4
global_func/load_contest_file.py CHANGED
@@ -26,10 +26,12 @@ def load_contest_file(upload, sport):
26
  df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
27
 
28
  # Split the lineup string by replacing position indicators with commas
29
- # Only replace position indicators that are at the start of a player entry
30
- df['Lineup'] = df['Lineup'].str.replace(r'(?<!\w)(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
 
31
 
32
  # Split into individual columns and remove position indicators
 
33
  max_players = int(df['Lineup'].str.split(',').str.len().max())
34
 
35
  if max_players <= 0:
@@ -39,8 +41,8 @@ def load_contest_file(upload, sport):
39
  # Create columns for each player
40
  for i in range(1, max_players):
41
  df[i] = df['Lineup'].str.split(',').str[i].str.strip()
42
- # Only remove position indicators that are at the end of the entry
43
- df[i] = df[i].str.replace(r'\b(' + '|'.join(pos_values) + r')\s*$', '', regex=True)
44
 
45
  if sport == 'MLB':
46
  df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
 
26
  df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
27
 
28
  # Split the lineup string by replacing position indicators with commas
29
+ # We need to ensure we only replace position indicators that are at the start of a player entry
30
+ # and not those that might appear within player names
31
+ df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
32
 
33
  # Split into individual columns and remove position indicators
34
+ # First, determine the maximum number of players in any lineup
35
  max_players = int(df['Lineup'].str.split(',').str.len().max())
36
 
37
  if max_players <= 0:
 
41
  # Create columns for each player
42
  for i in range(1, max_players):
43
  df[i] = df['Lineup'].str.split(',').str[i].str.strip()
44
+ # Remove position indicators from the end of each entry
45
+ df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
46
 
47
  if sport == 'MLB':
48
  df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})