James McCool commited on
Commit
46a28f1
·
1 Parent(s): dd94c84

Refactor predict_dupes.py to implement vectorized calculations for ownership and similarity scores, improving performance. Introduce new functions for weighted ownership and player similarity, while maintaining backward compatibility. Update data type handling for portfolio results to optimize memory usage.

Browse files
Files changed (1) hide show
  1. global_func/predict_dupes.py +290 -83
global_func/predict_dupes.py CHANGED
@@ -5,59 +5,64 @@ import time
5
  import math
6
  from difflib import SequenceMatcher
7
 
8
- def calculate_weighted_ownership(row_ownerships):
9
  """
10
- Calculate weighted ownership based on the formula:
11
- (AVERAGE of (each value's average with overall average)) * count - (max - min)
12
 
13
  Args:
14
- row_ownerships: Series containing ownership values in percentage form (e.g., 24.2213 for 24.2213%)
15
 
16
  Returns:
17
- float: Calculated weighted ownership value
18
  """
19
- # Drop NaN values and convert percentages to decimals
20
- row_ownerships = row_ownerships.dropna() / 100
21
 
22
- # Get the mean of all ownership values
23
- row_mean = row_ownerships.mean()
24
 
25
  # Calculate average of each value with the overall mean
26
- value_means = [(val + row_mean) / 2 for val in row_ownerships]
27
 
28
  # Take average of all those means
29
- avg_of_means = sum(value_means) / len(row_ownerships)
30
 
31
  # Multiply by count of values
32
- weighted = avg_of_means * (len(row_ownerships) * 1)
33
 
34
- # Subtract (max - min)
35
- weighted = weighted - (row_ownerships.max() - row_ownerships.min())
 
 
36
 
37
- # Convert back to percentage form to match input format
38
  return weighted * 10000
39
 
40
- def calculate_player_similarity_score(portfolio, player_columns):
41
  """
42
- Calculate a similarity score that measures how different each row is from all other rows
43
- based on actual player selection. Optimized for speed using vectorized operations.
44
- Higher scores indicate more unique/different lineups.
45
 
46
  Args:
47
- portfolio: DataFrame containing the portfolio data
48
- player_columns: List of column names containing player names
49
 
50
  Returns:
51
- Series: Similarity scores for each row
 
 
 
 
 
 
52
  """
53
- # Extract player data
54
- player_data = portfolio[player_columns].fillna('')
 
 
55
 
56
  # Get all unique players and create a mapping to numeric IDs
57
  all_players = set()
58
- for col in player_columns:
59
- unique_vals = player_data[col].unique()
60
- for val in unique_vals:
61
  if isinstance(val, str) and val.strip() != '':
62
  all_players.add(val)
63
 
@@ -69,46 +74,207 @@ def calculate_player_similarity_score(portfolio, player_columns):
69
  n_rows = len(portfolio)
70
  binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8)
71
 
72
- for i, (_, row) in enumerate(player_data.iterrows()):
73
- for val in row.values:
 
74
  if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id:
75
  binary_matrix[i, player_to_id[str(val)]] = 1
76
 
77
  # Vectorized Jaccard distance calculation
78
- # Use matrix operations to compute all pairwise distances at once
79
- similarity_scores = np.zeros(n_rows)
80
-
81
- # Compute intersection and union matrices
82
- # intersection[i,j] = number of players in common between row i and row j
83
- # union[i,j] = total number of unique players between row i and row j
84
  intersection_matrix = np.dot(binary_matrix, binary_matrix.T)
85
-
86
- # For union, we need: |A ∪ B| = |A| + |B| - |A ∩ B|
87
  row_sums = np.sum(binary_matrix, axis=1)
88
  union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix
89
 
90
  # Calculate Jaccard distance: 1 - (intersection / union)
91
- # Avoid division by zero
92
  with np.errstate(divide='ignore', invalid='ignore'):
93
  jaccard_similarity = np.divide(intersection_matrix, union_matrix,
94
  out=np.zeros_like(intersection_matrix, dtype=float),
95
  where=union_matrix != 0)
96
 
97
- # Convert similarity to distance and calculate average distance for each row
98
  jaccard_distance = 1 - jaccard_similarity
99
 
100
- # For each row, calculate average distance to all other rows
101
- # Exclude self-comparison (diagonal elements)
102
  np.fill_diagonal(jaccard_distance, 0)
103
- row_counts = n_rows - 1 # Exclude self
104
  similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts
105
 
106
- # Normalize to 0-1 scale where 1 = most unique/different
107
- if similarity_scores.max() > similarity_scores.min():
108
- similarity_scores = (similarity_scores - similarity_scores.min()) / (similarity_scores.max() - similarity_scores.min())
 
109
 
110
  return similarity_scores
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var):
113
  if strength_var == 'Weak':
114
  dupes_multiplier = .75
@@ -143,12 +309,14 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
143
  portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
144
  portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
145
  portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
 
146
 
147
- portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
148
- portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
149
- portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
150
- portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
151
- portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
 
152
 
153
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
154
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
@@ -175,7 +343,7 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
175
 
176
  for i in range(1, num_players + 1):
177
  portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
178
- portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
179
 
180
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
181
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
@@ -193,35 +361,63 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
193
 
194
  elif site_var == 'Draftkings':
195
  if type_var == 'Showdown':
196
- dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
197
- own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
 
 
 
 
198
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
199
  # Get the original player columns (first 6 columns excluding salary, median, Own)
200
  player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
201
-
202
- flex_ownerships = pd.concat([
203
- portfolio.iloc[:,1].map(maps_dict['own_map']),
204
- portfolio.iloc[:,2].map(maps_dict['own_map']),
205
- portfolio.iloc[:,3].map(maps_dict['own_map']),
206
- portfolio.iloc[:,4].map(maps_dict['own_map']),
207
- portfolio.iloc[:,5].map(maps_dict['own_map'])
208
- ])
 
 
 
 
 
 
 
 
 
209
  flex_rank = flex_ownerships.rank(pct=True)
210
 
211
  # Assign ranks back to individual columns using the same rank scale
212
- portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
213
- portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
214
- portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
215
- portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
216
- portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
217
- portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
- portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
220
- portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
221
- portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
222
- portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
223
- portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
224
- portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100
225
 
226
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
227
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
@@ -263,12 +459,12 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
263
  portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
264
  portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
265
 
266
- portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
267
- portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
268
- portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
269
- portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
270
- portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
271
- portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100
272
 
273
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
274
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
@@ -295,7 +491,7 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
295
 
296
  for i in range(1, num_players + 1):
297
  portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
298
- portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
299
 
300
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
301
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
@@ -348,14 +544,25 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
348
  portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5))
349
  portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1)
350
  portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
351
- portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1)
352
  portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
353
 
354
  # Calculate similarity score based on actual player selection
355
- portfolio['Diversity'] = calculate_player_similarity_score(portfolio, player_columns)
356
 
357
  portfolio = portfolio.drop(columns=dup_count_columns)
358
  portfolio = portfolio.drop(columns=own_columns)
359
  portfolio = portfolio.drop(columns=calc_columns)
 
 
 
 
 
 
 
 
 
 
 
360
 
361
- return portfolio
 
5
  import math
6
  from difflib import SequenceMatcher
7
 
8
+ def calculate_weighted_ownership_vectorized(ownership_array):
9
  """
10
+ Vectorized version of calculate_weighted_ownership using NumPy operations.
 
11
 
12
  Args:
13
+ ownership_array: 2D array of ownership values (rows x players)
14
 
15
  Returns:
16
+ array: Calculated weighted ownership values for each row
17
  """
18
+ # Convert percentages to decimals and handle NaN values
19
+ ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100
20
 
21
+ # Calculate row means
22
+ row_means = np.mean(ownership_array, axis=1, keepdims=True)
23
 
24
  # Calculate average of each value with the overall mean
25
+ value_means = (ownership_array + row_means) / 2
26
 
27
  # Take average of all those means
28
+ avg_of_means = np.mean(value_means, axis=1)
29
 
30
  # Multiply by count of values
31
+ weighted = avg_of_means * ownership_array.shape[1]
32
 
33
+ # Subtract (max - min) for each row
34
+ row_max = np.max(ownership_array, axis=1)
35
+ row_min = np.min(ownership_array, axis=1)
36
+ weighted = weighted - (row_max - row_min)
37
 
38
+ # Convert back to percentage form
39
  return weighted * 10000
40
 
41
+ def calculate_weighted_ownership_wrapper(row_ownerships):
42
  """
43
+ Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
 
 
44
 
45
  Args:
46
+ row_ownerships: Series containing ownership values in percentage form
 
47
 
48
  Returns:
49
+ float: Calculated weighted ownership value
50
+ """
51
+ # Convert Series to 2D array for vectorized function
52
+ ownership_array = row_ownerships.values.reshape(1, -1)
53
+ return calculate_weighted_ownership_vectorized(ownership_array)[0]
54
+
55
+ def calculate_player_similarity_score_vectorized(portfolio, player_columns):
56
  """
57
+ Vectorized version of calculate_player_similarity_score using NumPy operations.
58
+ """
59
+ # Extract player data and convert to string array
60
+ player_data = portfolio[player_columns].astype(str).fillna('').values
61
 
62
  # Get all unique players and create a mapping to numeric IDs
63
  all_players = set()
64
+ for row in player_data:
65
+ for val in row:
 
66
  if isinstance(val, str) and val.strip() != '':
67
  all_players.add(val)
68
 
 
74
  n_rows = len(portfolio)
75
  binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8)
76
 
77
+ # Vectorized binary matrix creation
78
+ for i, row in enumerate(player_data):
79
+ for val in row:
80
  if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id:
81
  binary_matrix[i, player_to_id[str(val)]] = 1
82
 
83
  # Vectorized Jaccard distance calculation
 
 
 
 
 
 
84
  intersection_matrix = np.dot(binary_matrix, binary_matrix.T)
 
 
85
  row_sums = np.sum(binary_matrix, axis=1)
86
  union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix
87
 
88
  # Calculate Jaccard distance: 1 - (intersection / union)
 
89
  with np.errstate(divide='ignore', invalid='ignore'):
90
  jaccard_similarity = np.divide(intersection_matrix, union_matrix,
91
  out=np.zeros_like(intersection_matrix, dtype=float),
92
  where=union_matrix != 0)
93
 
 
94
  jaccard_distance = 1 - jaccard_similarity
95
 
96
+ # Exclude self-comparison and calculate average distance for each row
 
97
  np.fill_diagonal(jaccard_distance, 0)
98
+ row_counts = n_rows - 1
99
  similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts
100
 
101
+ # Normalize to 0-1 scale
102
+ score_range = similarity_scores.max() - similarity_scores.min()
103
+ if score_range > 0:
104
+ similarity_scores = (similarity_scores - similarity_scores.min()) / score_range
105
 
106
  return similarity_scores
107
 
108
+ def predict_dupes_vectorized(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var):
109
+ """
110
+ Vectorized version of predict_dupes using NumPy arrays for better performance.
111
+ """
112
+ # Set multipliers based on strength
113
+ if strength_var == 'Weak':
114
+ dupes_multiplier = 0.75
115
+ percentile_multiplier = 0.90
116
+ elif strength_var == 'Average':
117
+ dupes_multiplier = 1.00
118
+ percentile_multiplier = 1.00
119
+ elif strength_var == 'Sharp':
120
+ dupes_multiplier = 1.25
121
+ percentile_multiplier = 1.10
122
+
123
+ max_ownership = max(maps_dict['own_map'].values()) / 100
124
+ average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100
125
+
126
+ # Convert portfolio to NumPy arrays for faster operations
127
+ portfolio_values = portfolio.values
128
+ n_rows = len(portfolio)
129
+
130
+ # Pre-allocate arrays for ownership data
131
+ if site_var == 'Fanduel':
132
+ if type_var == 'Showdown':
133
+ num_players = 5
134
+ salary_cap = 60000
135
+ player_cols = list(range(5)) # First 5 columns are players
136
+ elif type_var == 'Classic':
137
+ if sport_var == 'WNBA':
138
+ num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
139
+ salary_cap = 40000
140
+ player_cols = list(range(num_players))
141
+ else:
142
+ num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
143
+ salary_cap = 60000
144
+ player_cols = list(range(num_players))
145
+ elif site_var == 'Draftkings':
146
+ if type_var == 'Showdown':
147
+ num_players = 6
148
+ salary_cap = 50000
149
+ player_cols = list(range(6))
150
+ elif type_var == 'Classic':
151
+ if sport_var == 'CS2':
152
+ num_players = 6
153
+ salary_cap = 50000
154
+ player_cols = list(range(6))
155
+ else:
156
+ num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
157
+ salary_cap = 50000
158
+ player_cols = list(range(num_players))
159
+
160
+ # Pre-allocate ownership arrays
161
+ ownership_array = np.zeros((n_rows, num_players), dtype=np.float32)
162
+ ownership_rank_array = np.zeros((n_rows, num_players), dtype=np.float32)
163
+
164
+ # Vectorized ownership mapping
165
+ for i, col_idx in enumerate(player_cols):
166
+ if i == 0 and type_var == 'Showdown': # Captain
167
+ ownership_array[:, i] = np.vectorize(lambda x: maps_dict['cpt_own_map'].get(x, 0))(portfolio_values[:, col_idx]) / 100
168
+ ownership_rank_array[:, i] = np.vectorize(lambda x: maps_dict['cpt_own_map'].get(x, 0))(portfolio_values[:, col_idx])
169
+ else: # Flex players
170
+ ownership_array[:, i] = np.vectorize(lambda x: maps_dict['own_map'].get(x, 0))(portfolio_values[:, col_idx]) / 100
171
+ ownership_rank_array[:, i] = np.vectorize(lambda x: maps_dict['own_map'].get(x, 0))(portfolio_values[:, col_idx])
172
+
173
+ # Calculate ranks for flex players (excluding captain)
174
+ if type_var == 'Showdown':
175
+ flex_ownerships = ownership_rank_array[:, 1:].flatten()
176
+ flex_rank = pd.Series(flex_ownerships).rank(pct=True).values.reshape(n_rows, -1)
177
+ ownership_rank_array[:, 1:] = flex_rank
178
+
179
+ # Convert to percentile ranks
180
+ ownership_rank_array = ownership_rank_array / 100
181
+
182
+ # Vectorized calculations
183
+ own_product = np.prod(ownership_array, axis=1)
184
+ own_average = (portfolio_values[:, portfolio.columns.get_loc('Own')].max() * 0.33) / 100
185
+ own_sum = np.sum(ownership_array, axis=1)
186
+ avg_own_rank = np.mean(ownership_rank_array, axis=1)
187
+
188
+ # Calculate dupes formula vectorized
189
+ salary_col = portfolio.columns.get_loc('salary')
190
+ own_col = portfolio.columns.get_loc('Own')
191
+
192
+ dupes_calc = (own_product * avg_own_rank) * Contest_Size + \
193
+ ((portfolio_values[:, salary_col] - (salary_cap - portfolio_values[:, own_col])) / 100) - \
194
+ ((salary_cap - portfolio_values[:, salary_col]) / 100)
195
+
196
+ dupes_calc *= dupes_multiplier
197
+
198
+ # Round and handle negative values
199
+ dupes = np.where(np.round(dupes_calc, 0) <= 0, 0, np.round(dupes_calc, 0) - 1)
200
+
201
+ # Calculate own_ratio vectorized
202
+ max_own_mask = np.any(ownership_array == max_ownership, axis=1)
203
+ own_ratio = np.where(max_own_mask,
204
+ own_sum / own_average,
205
+ (own_sum - max_ownership) / own_average)
206
+
207
+ # Calculate Finish_percentile vectorized
208
+ percentile_cut_scalar = portfolio_values[:, portfolio.columns.get_loc('median')].max()
209
+
210
+ if type_var == 'Classic':
211
+ own_ratio_nerf = 2 if sport_var == 'CS2' else 1.5
212
+ elif type_var == 'Showdown':
213
+ own_ratio_nerf = 1.5
214
+
215
+ median_col = portfolio.columns.get_loc('median')
216
+ finish_percentile = (own_ratio - own_ratio_nerf) / ((5 * (portfolio_values[:, median_col] / percentile_cut_scalar)) / 3)
217
+ finish_percentile = np.where(finish_percentile < 0.0005, 0.0005, finish_percentile / 2)
218
+
219
+ # Calculate other metrics vectorized
220
+ ref_proj = portfolio_values[:, median_col].max()
221
+ max_proj = ref_proj + 10
222
+ min_proj = ref_proj - 10
223
+ avg_ref = (max_proj + min_proj) / 2
224
+
225
+ win_percent = (((portfolio_values[:, median_col] / avg_ref) - (0.1 + ((ref_proj - portfolio_values[:, median_col])/100))) / (Contest_Size / 1000)) / 10
226
+ max_allowed_win = (1 / Contest_Size) * 5
227
+ win_percent = win_percent / win_percent.max() * max_allowed_win
228
+
229
+ finish_percentile = finish_percentile + 0.005 + (0.005 * (Contest_Size / 10000))
230
+ finish_percentile *= percentile_multiplier
231
+ win_percent *= (1 - finish_percentile)
232
+
233
+ # Calculate low ownership count vectorized
234
+ low_own_count = np.sum(ownership_array < 0.10, axis=1)
235
+ finish_percentile = np.where(low_own_count <= 0,
236
+ finish_percentile,
237
+ finish_percentile / low_own_count)
238
+
239
+ # Calculate Lineup Edge vectorized
240
+ lineup_edge = win_percent * ((0.5 - finish_percentile) * (Contest_Size / 2.5))
241
+ lineup_edge = np.where(dupes > 0, lineup_edge / (dupes + 1), lineup_edge)
242
+ lineup_edge = lineup_edge - lineup_edge.mean()
243
+
244
+ # Calculate Weighted Own vectorized
245
+ weighted_own = calculate_weighted_ownership_vectorized(ownership_array)
246
+
247
+ # Calculate Geomean vectorized
248
+ geomean = np.power(np.prod(ownership_array * 100, axis=1), 1 / num_players)
249
+
250
+ # Calculate Diversity vectorized
251
+ diversity = calculate_player_similarity_score_vectorized(portfolio, player_cols)
252
+
253
+ # Create result DataFrame with optimized data types
254
+ result_data = {
255
+ 'Dupes': dupes.astype('uint16'),
256
+ 'median': portfolio_values[:, portfolio.columns.get_loc('median')].astype('float32'),
257
+ 'Own': portfolio_values[:, portfolio.columns.get_loc('Own')].astype('float32'),
258
+ 'salary': portfolio_values[:, portfolio.columns.get_loc('salary')].astype('uint16'),
259
+ 'Finish_percentile': finish_percentile.astype('float32'),
260
+ 'Win%': win_percent.astype('float32'),
261
+ 'Lineup Edge': lineup_edge.astype('float32'),
262
+ 'Weighted Own': weighted_own.astype('float32'),
263
+ 'Geomean': geomean.astype('float32'),
264
+ 'Diversity': diversity.astype('float32')
265
+ }
266
+
267
+ # Add Size column if it exists
268
+ if 'Size' in portfolio.columns:
269
+ result_data['Size'] = portfolio_values[:, portfolio.columns.get_loc('Size')].astype('uint16')
270
+
271
+ # Add player columns back
272
+ for i, col_name in enumerate(portfolio.columns[:num_players]):
273
+ result_data[col_name] = portfolio_values[:, i]
274
+
275
+ return pd.DataFrame(result_data)
276
+
277
+ # Keep the original function for backward compatibility
278
  def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var):
279
  if strength_var == 'Weak':
280
  dupes_multiplier = .75
 
309
  portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
310
  portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
311
  portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
312
+ portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
313
 
314
+ portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
315
+ portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
316
+ portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100
317
+ portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100
318
+ portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
319
+ portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
320
 
321
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
322
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
 
343
 
344
  for i in range(1, num_players + 1):
345
  portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
346
+ portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']).astype('float32') / 100
347
 
348
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
349
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
 
361
 
362
  elif site_var == 'Draftkings':
363
  if type_var == 'Showdown':
364
+ if sport_var == 'GOLF':
365
+ dup_count_columns = ['FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank', 'FLEX6_Own_percent_rank']
366
+ own_columns = ['FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own', 'FLEX6_Own']
367
+ else:
368
+ dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
369
+ own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
370
  calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
371
  # Get the original player columns (first 6 columns excluding salary, median, Own)
372
  player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
373
+ if sport_var == 'GOLF':
374
+ flex_ownerships = pd.concat([
375
+ portfolio.iloc[:,0].map(maps_dict['own_map']),
376
+ portfolio.iloc[:,1].map(maps_dict['own_map']),
377
+ portfolio.iloc[:,2].map(maps_dict['own_map']),
378
+ portfolio.iloc[:,3].map(maps_dict['own_map']),
379
+ portfolio.iloc[:,4].map(maps_dict['own_map']),
380
+ portfolio.iloc[:,5].map(maps_dict['own_map'])
381
+ ])
382
+ else:
383
+ flex_ownerships = pd.concat([
384
+ portfolio.iloc[:,1].map(maps_dict['own_map']),
385
+ portfolio.iloc[:,2].map(maps_dict['own_map']),
386
+ portfolio.iloc[:,3].map(maps_dict['own_map']),
387
+ portfolio.iloc[:,4].map(maps_dict['own_map']),
388
+ portfolio.iloc[:,5].map(maps_dict['own_map'])
389
+ ])
390
  flex_rank = flex_ownerships.rank(pct=True)
391
 
392
  # Assign ranks back to individual columns using the same rank scale
393
+ if sport_var == 'GOLF':
394
+ portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
395
+ portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
396
+ portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
397
+ portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
398
+ portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
399
+ portfolio['FLEX6_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
400
+
401
+ portfolio['FLEX1_Own'] = portfolio.iloc[:,0].map(maps_dict['own_map']).astype('float32') / 100
402
+ portfolio['FLEX2_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
403
+ portfolio['FLEX3_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100
404
+ portfolio['FLEX4_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100
405
+ portfolio['FLEX5_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
406
+ portfolio['FLEX6_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
407
+ else:
408
+ portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
409
+ portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
410
+ portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
411
+ portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
412
+ portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
413
+ portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
414
 
415
+ portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
416
+ portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
417
+ portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100
418
+ portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100
419
+ portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
420
+ portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
421
 
422
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
423
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
 
459
  portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
460
  portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
461
 
462
+ portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).astype('float32') / 100
463
+ portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']).astype('float32') / 100
464
+ portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']).astype('float32') / 100
465
+ portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']).astype('float32') / 100
466
+ portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']).astype('float32') / 100
467
+ portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']).astype('float32') / 100
468
 
469
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
470
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
 
491
 
492
  for i in range(1, num_players + 1):
493
  portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
494
+ portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']).astype('float32') / 100
495
 
496
  portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
497
  portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
 
544
  portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5))
545
  portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1)
546
  portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
547
+ portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership_wrapper, axis=1)
548
  portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
549
 
550
  # Calculate similarity score based on actual player selection
551
+ portfolio['Diversity'] = calculate_player_similarity_score_vectorized(portfolio, player_columns)
552
 
553
  portfolio = portfolio.drop(columns=dup_count_columns)
554
  portfolio = portfolio.drop(columns=own_columns)
555
  portfolio = portfolio.drop(columns=calc_columns)
556
+
557
+ int16_columns_stacks = ['Dupes', 'Size', 'salary']
558
+ int16_columns_nstacks = ['Dupes', 'salary']
559
+ float32_columns = ['median', 'Own', 'Finish_percentile', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
560
+
561
+ try:
562
+ portfolio[int16_columns_stacks] = portfolio[int16_columns_stacks].astype('uint16')
563
+ except:
564
+ portfolio[int16_columns_nstacks] = portfolio[int16_columns_nstacks].astype('uint16')
565
+
566
+ portfolio[float32_columns] = portfolio[float32_columns].astype('float32')
567
 
568
+ return portfolio