James McCool
commited on
Commit
·
0dedc97
1
Parent(s):
3a102e0
Refine lineup string processing in load_contest_file function
Browse files- Updated regex logic to ensure position indicators are only replaced at the start of player entries and not within player names, enhancing accuracy in lineup formatting.
- Adjusted the removal of position indicators to target only those at the end of each entry, ensuring cleaner data output.
- These changes contribute to ongoing efforts to improve data integrity and user experience within the application.
global_func/load_contest_file.py
CHANGED
@@ -26,10 +26,12 @@ def load_contest_file(upload, sport):
|
|
26 |
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
|
27 |
|
28 |
# Split the lineup string by replacing position indicators with commas
|
29 |
-
#
|
30 |
-
|
|
|
31 |
|
32 |
# Split into individual columns and remove position indicators
|
|
|
33 |
max_players = int(df['Lineup'].str.split(',').str.len().max())
|
34 |
|
35 |
if max_players <= 0:
|
@@ -39,8 +41,8 @@ def load_contest_file(upload, sport):
|
|
39 |
# Create columns for each player
|
40 |
for i in range(1, max_players):
|
41 |
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
42 |
-
#
|
43 |
-
df[i] = df[i].str.replace(r'\
|
44 |
|
45 |
if sport == 'MLB':
|
46 |
df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
|
|
|
26 |
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
|
27 |
|
28 |
# Split the lineup string by replacing position indicators with commas
|
29 |
+
# We need to ensure we only replace position indicators that are at the start of a player entry
|
30 |
+
# and not those that might appear within player names
|
31 |
+
df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
|
32 |
|
33 |
# Split into individual columns and remove position indicators
|
34 |
+
# First, determine the maximum number of players in any lineup
|
35 |
max_players = int(df['Lineup'].str.split(',').str.len().max())
|
36 |
|
37 |
if max_players <= 0:
|
|
|
41 |
# Create columns for each player
|
42 |
for i in range(1, max_players):
|
43 |
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
44 |
+
# Remove position indicators from the end of each entry
|
45 |
+
df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
|
46 |
|
47 |
if sport == 'MLB':
|
48 |
df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
|