James McCool commited on
Commit
c283108
·
1 Parent(s): dd908a8

Enhance player selection handling in predict_dupes function

Browse files

This update improves the player selection process by ensuring only valid string values are added to the unique player set. Additionally, it introduces a check to return a zero vector if no valid players are found, enhancing robustness. The player_columns are dynamically defined based on the portfolio structure, ensuring accurate processing of player data across different lineup configurations.

Files changed (1) hide show
  1. global_func/predict_dupes.py +24 -3
global_func/predict_dupes.py CHANGED
@@ -56,9 +56,19 @@ def calculate_player_similarity_score(portfolio, player_columns):
56
  # Get all unique players across all lineups
57
  all_players = set()
58
  for col in player_columns:
59
- all_players.update(player_data[col].unique())
 
 
 
 
 
 
60
  all_players = sorted(list(all_players))
61
 
 
 
 
 
62
  # Create a binary matrix: 1 if player is in lineup, 0 if not
63
  binary_matrix = np.zeros((len(portfolio), len(all_players)))
64
 
@@ -115,7 +125,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
115
  dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
116
  own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
117
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
 
118
  player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']]
 
119
  flex_ownerships = pd.concat([
120
  portfolio.iloc[:,1].map(maps_dict['own_map']),
121
  portfolio.iloc[:,2].map(maps_dict['own_map']),
@@ -157,7 +169,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
157
  dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
158
  own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
159
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
 
160
  player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
 
161
  for i in range(1, num_players + 1):
162
  portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
163
  portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
@@ -181,7 +195,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
181
  dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
182
  own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
183
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
 
184
  player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
 
185
  flex_ownerships = pd.concat([
186
  portfolio.iloc[:,1].map(maps_dict['own_map']),
187
  portfolio.iloc[:,2].map(maps_dict['own_map']),
@@ -226,7 +242,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
226
  dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
227
  own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
228
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
 
229
  player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
 
230
  flex_ownerships = pd.concat([
231
  portfolio.iloc[:,1].map(maps_dict['own_map']),
232
  portfolio.iloc[:,2].map(maps_dict['own_map']),
@@ -271,7 +289,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
271
  dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
272
  own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
273
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
 
274
  player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
 
275
  for i in range(1, num_players + 1):
276
  portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
277
  portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
@@ -329,9 +349,10 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
329
  portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
330
  portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1)
331
  portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
332
-
 
333
  portfolio['Similarity Score'] = calculate_player_similarity_score(portfolio, player_columns)
334
-
335
  portfolio = portfolio.drop(columns=dup_count_columns)
336
  portfolio = portfolio.drop(columns=own_columns)
337
  portfolio = portfolio.drop(columns=calc_columns)
 
56
  # Get all unique players across all lineups
57
  all_players = set()
58
  for col in player_columns:
59
+ # Only add string values (player names), skip numeric values
60
+ unique_vals = player_data[col].unique()
61
+ for val in unique_vals:
62
+ if isinstance(val, str) and val.strip() != '':
63
+ all_players.add(val)
64
+
65
+ # Convert to sorted list
66
  all_players = sorted(list(all_players))
67
 
68
+ # If no valid players found, return zeros
69
+ if len(all_players) == 0:
70
+ return np.zeros(len(portfolio))
71
+
72
  # Create a binary matrix: 1 if player is in lineup, 0 if not
73
  binary_matrix = np.zeros((len(portfolio), len(all_players)))
74
 
 
125
  dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
126
  own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
127
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
128
+ # Get the original player columns (first 5 columns excluding salary, median, Own)
129
  player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']]
130
+
131
  flex_ownerships = pd.concat([
132
  portfolio.iloc[:,1].map(maps_dict['own_map']),
133
  portfolio.iloc[:,2].map(maps_dict['own_map']),
 
169
  dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
170
  own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
171
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
172
+ # Get the original player columns (first num_players columns excluding salary, median, Own)
173
  player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
174
+
175
  for i in range(1, num_players + 1):
176
  portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
177
  portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
 
195
  dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
196
  own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
197
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
198
+ # Get the original player columns (first 6 columns excluding salary, median, Own)
199
  player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
200
+
201
  flex_ownerships = pd.concat([
202
  portfolio.iloc[:,1].map(maps_dict['own_map']),
203
  portfolio.iloc[:,2].map(maps_dict['own_map']),
 
242
  dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
243
  own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
244
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
245
+ # Get the original player columns (first 6 columns excluding salary, median, Own)
246
  player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
247
+
248
  flex_ownerships = pd.concat([
249
  portfolio.iloc[:,1].map(maps_dict['own_map']),
250
  portfolio.iloc[:,2].map(maps_dict['own_map']),
 
289
  dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
290
  own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
291
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
292
+ # Get the original player columns (first num_players columns excluding salary, median, Own)
293
  player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
294
+
295
  for i in range(1, num_players + 1):
296
  portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
297
  portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
 
349
  portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
350
  portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1)
351
  portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
352
+
353
+ # Calculate similarity score based on actual player selection
354
  portfolio['Similarity Score'] = calculate_player_similarity_score(portfolio, player_columns)
355
+
356
  portfolio = portfolio.drop(columns=dup_count_columns)
357
  portfolio = portfolio.drop(columns=own_columns)
358
  portfolio = portfolio.drop(columns=calc_columns)