James McCool
commited on
Commit
·
c283108
1
Parent(s):
dd908a8
Enhance player selection handling in predict_dupes function
Browse filesThis update improves the player selection process by ensuring only valid string values are added to the unique player set. Additionally, it introduces a check to return a zero vector if no valid players are found, enhancing robustness. The player_columns are dynamically defined based on the portfolio structure, ensuring accurate processing of player data across different lineup configurations.
- global_func/predict_dupes.py +24 -3
global_func/predict_dupes.py
CHANGED
@@ -56,9 +56,19 @@ def calculate_player_similarity_score(portfolio, player_columns):
|
|
56 |
# Get all unique players across all lineups
|
57 |
all_players = set()
|
58 |
for col in player_columns:
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
all_players = sorted(list(all_players))
|
61 |
|
|
|
|
|
|
|
|
|
62 |
# Create a binary matrix: 1 if player is in lineup, 0 if not
|
63 |
binary_matrix = np.zeros((len(portfolio), len(all_players)))
|
64 |
|
@@ -115,7 +125,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
115 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
|
116 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
|
117 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
118 |
player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']]
|
|
|
119 |
flex_ownerships = pd.concat([
|
120 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
121 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
@@ -157,7 +169,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
157 |
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
158 |
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
159 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
160 |
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
|
|
|
161 |
for i in range(1, num_players + 1):
|
162 |
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
163 |
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
@@ -181,7 +195,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
181 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
182 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
183 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
184 |
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
|
|
|
185 |
flex_ownerships = pd.concat([
|
186 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
187 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
@@ -226,7 +242,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
226 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
227 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
228 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
229 |
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
|
|
|
230 |
flex_ownerships = pd.concat([
|
231 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
232 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
@@ -271,7 +289,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
271 |
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
272 |
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
273 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
274 |
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
|
|
|
275 |
for i in range(1, num_players + 1):
|
276 |
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
277 |
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
@@ -329,9 +349,10 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
329 |
portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
|
330 |
portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1)
|
331 |
portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
|
332 |
-
|
|
|
333 |
portfolio['Similarity Score'] = calculate_player_similarity_score(portfolio, player_columns)
|
334 |
-
|
335 |
portfolio = portfolio.drop(columns=dup_count_columns)
|
336 |
portfolio = portfolio.drop(columns=own_columns)
|
337 |
portfolio = portfolio.drop(columns=calc_columns)
|
|
|
56 |
# Get all unique players across all lineups
|
57 |
all_players = set()
|
58 |
for col in player_columns:
|
59 |
+
# Only add string values (player names), skip numeric values
|
60 |
+
unique_vals = player_data[col].unique()
|
61 |
+
for val in unique_vals:
|
62 |
+
if isinstance(val, str) and val.strip() != '':
|
63 |
+
all_players.add(val)
|
64 |
+
|
65 |
+
# Convert to sorted list
|
66 |
all_players = sorted(list(all_players))
|
67 |
|
68 |
+
# If no valid players found, return zeros
|
69 |
+
if len(all_players) == 0:
|
70 |
+
return np.zeros(len(portfolio))
|
71 |
+
|
72 |
# Create a binary matrix: 1 if player is in lineup, 0 if not
|
73 |
binary_matrix = np.zeros((len(portfolio), len(all_players)))
|
74 |
|
|
|
125 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
|
126 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
|
127 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
128 |
+
# Get the original player columns (first 5 columns excluding salary, median, Own)
|
129 |
player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']]
|
130 |
+
|
131 |
flex_ownerships = pd.concat([
|
132 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
133 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
|
169 |
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
170 |
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
171 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
172 |
+
# Get the original player columns (first num_players columns excluding salary, median, Own)
|
173 |
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
|
174 |
+
|
175 |
for i in range(1, num_players + 1):
|
176 |
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
177 |
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
|
|
195 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
196 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
197 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
198 |
+
# Get the original player columns (first 6 columns excluding salary, median, Own)
|
199 |
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
|
200 |
+
|
201 |
flex_ownerships = pd.concat([
|
202 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
203 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
|
242 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
243 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
244 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
245 |
+
# Get the original player columns (first 6 columns excluding salary, median, Own)
|
246 |
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
|
247 |
+
|
248 |
flex_ownerships = pd.concat([
|
249 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
250 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
|
289 |
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
290 |
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
291 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
292 |
+
# Get the original player columns (first num_players columns excluding salary, median, Own)
|
293 |
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
|
294 |
+
|
295 |
for i in range(1, num_players + 1):
|
296 |
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
297 |
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
|
|
349 |
portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
|
350 |
portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1)
|
351 |
portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
|
352 |
+
|
353 |
+
# Calculate similarity score based on actual player selection
|
354 |
portfolio['Similarity Score'] = calculate_player_similarity_score(portfolio, player_columns)
|
355 |
+
|
356 |
portfolio = portfolio.drop(columns=dup_count_columns)
|
357 |
portfolio = portfolio.drop(columns=own_columns)
|
358 |
portfolio = portfolio.drop(columns=calc_columns)
|