James McCool commited on
Commit
448fa4e
·
1 Parent(s): f76b524

Optimize performance in app.py with vectorized operations

Browse files

- Refactored multiple calculations in the app to utilize vectorized operations, significantly improving performance and efficiency. This includes stack, salary, actual_fpts, and ownership calculations, as well as unique player counts. Enhanced readability and maintainability of the code by consolidating repetitive logic.

Files changed (1) hide show
  1. app.py +113 -86
app.py CHANGED
@@ -204,6 +204,8 @@ with tab2:
204
  st.session_state['player_columns'] = [col for col in st.session_state['Contest'].columns if col not in excluded_cols]
205
  st.session_state['stack_columns'] = [col for col in st.session_state['Contest'].columns if col not in exclude_stacks]
206
  print(st.session_state['player_columns'])
 
 
207
  for col in st.session_state['player_columns']:
208
  st.session_state['Contest'][col] = st.session_state['Contest'][col].astype(str).str.strip()
209
 
@@ -215,121 +217,146 @@ with tab2:
215
  'own_map': st.session_state['ownership_dict'],
216
  'own_percent_rank': dict(zip(st.session_state['ownership_df']['Player'], st.session_state['ownership_df']['Own'].rank(pct=True)))
217
  }
 
218
  # Create a copy of the dataframe for calculations
219
  working_df = st.session_state['Contest'].copy()
220
-
 
 
 
 
 
 
221
  if type_var == 'Classic':
222
- working_df['stack'] = working_df.apply(
223
- lambda row: Counter(
224
- st.session_state['map_dict']['team_map'].get(player, '') for player in row[st.session_state['stack_columns']]
225
- if st.session_state['map_dict']['team_map'].get(player, '') != ''
226
- ).most_common(1)[0][0] if any(st.session_state['map_dict']['team_map'].get(player, '') for player in row[st.session_state['stack_columns']]) else '',
227
- axis=1
228
  )
229
- working_df['stack_size'] = working_df.apply(
230
- lambda row: Counter(
231
- st.session_state['map_dict']['team_map'].get(player, '') for player in row[st.session_state['stack_columns']]
232
- if st.session_state['map_dict']['team_map'].get(player, '') != ''
233
- ).most_common(1)[0][1] if any(st.session_state['map_dict']['team_map'].get(player, '') for player in row[st.session_state['stack_columns']]) else '',
234
- axis=1
 
 
 
 
 
 
 
 
 
 
 
 
235
  )
236
- working_df['salary'] = working_df.apply(lambda row: sum(st.session_state['salary_dict'].get(player, 0) for player in row[st.session_state['player_columns']]), axis=1)
237
- working_df['actual_fpts'] = working_df.apply(lambda row: sum(st.session_state['actual_dict'].get(player, 0) for player in row[st.session_state['player_columns']]), axis=1)
238
- working_df['actual_own'] = working_df.apply(lambda row: sum(st.session_state['ownership_dict'].get(player, 0) for player in row[st.session_state['player_columns']]), axis=1)
239
- print("Sample row values:")
240
- print(working_df.iloc[0][st.session_state['player_columns']])
241
- print("Sample salary calculation:")
242
- sample_row = working_df.iloc[0]
243
- sample_salary = sum(st.session_state['salary_dict'].get(player, 0) for player in sample_row[st.session_state['player_columns']])
244
- print(f"Sample salary: {sample_salary}")
245
- print("Individual player salaries:")
246
- for player in sample_row[st.session_state['player_columns']]:
247
- salary = st.session_state['salary_dict'].get(player, 0)
248
- print(f" {player}: {salary}")
 
 
249
  working_df['sorted'] = working_df[st.session_state['player_columns']].apply(
250
- lambda row: ','.join(sorted(row.values)),
251
- axis=1
252
  )
253
  working_df['dupes'] = working_df.groupby('sorted').transform('size')
254
-
255
- working_df['uniques'] = working_df.groupby('BaseName').apply(
256
- lambda x: (x['dupes'] == 1).sum()
257
- ).reindex(working_df['BaseName']).values
258
-
259
- working_df['under_5'] = working_df.groupby('BaseName').apply(
260
- lambda x: (x['dupes'] <= 5).sum()
261
- ).reindex(working_df['BaseName']).values
262
-
263
- working_df['under_10'] = working_df.groupby('BaseName').apply(
264
- lambda x: (x['dupes'] <= 10).sum()
265
- ).reindex(working_df['BaseName']).values
266
-
267
  working_df = working_df.reset_index()
268
  working_df['percentile_finish'] = working_df['index'].rank(pct=True)
269
  working_df['finish'] = working_df['index']
270
  working_df = working_df.drop(['sorted', 'index'], axis=1)
271
 
272
  elif type_var == 'Showdown':
273
- working_df['stack'] = working_df.apply(
274
- lambda row: Counter(
275
- st.session_state['map_dict']['team_map'].get(player, '') for player in row[2:]
276
- if st.session_state['map_dict']['team_map'].get(player, '') != ''
277
- ).most_common(1)[0][0] if any(st.session_state['map_dict']['team_map'].get(player, '') for player in row[2:]) else '',
278
- axis=1
279
- )
280
- working_df['stack_size'] = working_df.apply(
281
- lambda row: Counter(
282
- st.session_state['map_dict']['team_map'].get(player, '') for player in row[2:]
283
- if st.session_state['map_dict']['team_map'].get(player, '') != ''
284
- ).most_common(1)[0][1] if any(st.session_state['map_dict']['team_map'].get(player, '') for player in row[2:]) else '',
285
- axis=1
286
  )
 
 
 
 
 
287
  if sport_select == 'GOLF':
288
- working_df['salary'] = working_df.apply(lambda row: sum(st.session_state['salary_dict'].get(player, 0) for player in row), axis=1)
289
- working_df['actual_fpts'] = working_df.apply(lambda row: sum(st.session_state['actual_dict'].get(player, 0) for player in row), axis=1)
290
- else:
291
- # Modified salary calculation with 1.5x multiplier for first player
292
- working_df['salary'] = working_df.apply(
293
- lambda row: (st.session_state['map_dict']['salary_map'].get(row[2], 0) * 1.5) +
294
- sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row[3:]),
295
- axis=1
296
  )
297
- # Modified actual_fpts calculation with 1.5x multiplier for first player
298
- working_df['actual_fpts'] = working_df.apply(
299
- lambda row: (st.session_state['actual_dict'].get(row[2], 0) * 1.5) +
300
- sum(st.session_state['actual_dict'].get(player, 0) for player in row[3:]),
301
- axis=1
302
  )
303
- working_df['actual_own'] = working_df.apply(lambda row: sum(st.session_state['ownership_dict'].get(player, 0) for player in row), axis=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  working_df['sorted'] = working_df[st.session_state['player_columns']].apply(
305
- lambda row: ','.join(sorted(row.values)),
306
- axis=1
307
  )
308
  working_df['dupes'] = working_df.groupby('sorted').transform('size')
309
-
310
- working_df['uniques'] = working_df.groupby('BaseName').apply(
311
- lambda x: (x['dupes'] == 1).sum()
312
- ).reindex(working_df['BaseName']).values
313
-
314
- working_df['under_5'] = working_df.groupby('BaseName').apply(
315
- lambda x: (x['dupes'] <= 5).sum()
316
- ).reindex(working_df['BaseName']).values
317
-
318
- working_df['under_10'] = working_df.groupby('BaseName').apply(
319
- lambda x: (x['dupes'] <= 10).sum()
320
- ).reindex(working_df['BaseName']).values
321
-
322
  working_df = working_df.reset_index()
323
  working_df['percentile_finish'] = working_df['index'].rank(pct=True)
324
  working_df['finish'] = working_df['index']
325
  working_df = working_df.drop(['sorted', 'index'], axis=1)
326
- # working_df['stack_size'] = working_df['stack_size'].fillna(1).astype(int)
 
327
  st.session_state['field_player_frame'] = create_player_exposures(working_df, st.session_state['player_columns'])
328
  st.session_state['field_stack_frame'] = create_stack_exposures(working_df)
329
  st.session_state['display_contest_info'] = working_df.copy()
330
  st.session_state['contest_info_reset'] = working_df.copy()
331
  st.session_state['unique_players'] = pd.unique(st.session_state['display_contest_info'][st.session_state['player_columns']].values.ravel('K'))
332
- st.session_state['unique_players'] = [p for p in st.session_state['unique_players'] if p != 'nan'] # Remove any NaN values
333
 
334
  if 'display_contest_info' in st.session_state:
335
  with st.expander("Info and filters"):
@@ -354,7 +381,7 @@ with tab2:
354
  st.session_state['remove_names'] = []
355
  st.session_state['display_contest_info'] = st.session_state['contest_info_reset'].copy()
356
  st.session_state['unique_players'] = pd.unique(st.session_state['display_contest_info'][st.session_state['player_columns']].values.ravel('K'))
357
- st.session_state['unique_players'] = [p for p in st.session_state['unique_players'] if p != 'nan'] # Remove any NaN values
358
 
359
  with st.form(key='filter_form'):
360
  users_var, entries_var, stack_var, stack_size_var, player_var, remove_var = st.columns(6)
 
204
  st.session_state['player_columns'] = [col for col in st.session_state['Contest'].columns if col not in excluded_cols]
205
  st.session_state['stack_columns'] = [col for col in st.session_state['Contest'].columns if col not in exclude_stacks]
206
  print(st.session_state['player_columns'])
207
+
208
+ # Vectorized string operations
209
  for col in st.session_state['player_columns']:
210
  st.session_state['Contest'][col] = st.session_state['Contest'][col].astype(str).str.strip()
211
 
 
217
  'own_map': st.session_state['ownership_dict'],
218
  'own_percent_rank': dict(zip(st.session_state['ownership_df']['Player'], st.session_state['ownership_df']['Own'].rank(pct=True)))
219
  }
220
+
221
  # Create a copy of the dataframe for calculations
222
  working_df = st.session_state['Contest'].copy()
223
+
224
+ # Pre-compute lookup arrays for vectorized operations
225
+ team_map = st.session_state['map_dict']['team_map']
226
+ salary_map = st.session_state['salary_dict']
227
+ actual_map = st.session_state['actual_dict']
228
+ ownership_map = st.session_state['ownership_dict']
229
+
230
  if type_var == 'Classic':
231
+ # Vectorized stack calculation
232
+ player_teams = working_df[st.session_state['stack_columns']].apply(
233
+ lambda x: x.map(team_map).fillna('')
 
 
 
234
  )
235
+
236
+ # Vectorized stack and stack_size calculation
237
+ def get_most_common_team(teams):
238
+ if teams.empty or teams.isna().all():
239
+ return '', 0
240
+ non_empty_teams = teams[teams != '']
241
+ if len(non_empty_teams) == 0:
242
+ return '', 0
243
+ team_counts = non_empty_teams.value_counts()
244
+ return team_counts.index[0], team_counts.iloc[0]
245
+
246
+ stack_results = player_teams.apply(get_most_common_team, axis=1)
247
+ working_df['stack'] = [result[0] for result in stack_results]
248
+ working_df['stack_size'] = [result[1] for result in stack_results]
249
+
250
+ # Vectorized salary calculation
251
+ player_salaries = working_df[st.session_state['player_columns']].apply(
252
+ lambda x: x.map(salary_map).fillna(0)
253
  )
254
+ working_df['salary'] = player_salaries.sum(axis=1)
255
+
256
+ # Vectorized actual_fpts calculation
257
+ player_fpts = working_df[st.session_state['player_columns']].apply(
258
+ lambda x: x.map(actual_map).fillna(0)
259
+ )
260
+ working_df['actual_fpts'] = player_fpts.sum(axis=1)
261
+
262
+ # Vectorized actual_own calculation
263
+ player_ownership = working_df[st.session_state['player_columns']].apply(
264
+ lambda x: x.map(ownership_map).fillna(0)
265
+ )
266
+ working_df['actual_own'] = player_ownership.sum(axis=1)
267
+
268
+ # Vectorized duplication calculation
269
  working_df['sorted'] = working_df[st.session_state['player_columns']].apply(
270
+ lambda row: ','.join(sorted(row.values)), axis=1
 
271
  )
272
  working_df['dupes'] = working_df.groupby('sorted').transform('size')
273
+
274
+ # Vectorized unique calculations
275
+ working_df['uniques'] = working_df.groupby('BaseName')['dupes'].transform(
276
+ lambda x: (x == 1).sum()
277
+ )
278
+ working_df['under_5'] = working_df.groupby('BaseName')['dupes'].transform(
279
+ lambda x: (x <= 5).sum()
280
+ )
281
+ working_df['under_10'] = working_df.groupby('BaseName')['dupes'].transform(
282
+ lambda x: (x <= 10).sum()
283
+ )
284
+
 
285
  working_df = working_df.reset_index()
286
  working_df['percentile_finish'] = working_df['index'].rank(pct=True)
287
  working_df['finish'] = working_df['index']
288
  working_df = working_df.drop(['sorted', 'index'], axis=1)
289
 
290
  elif type_var == 'Showdown':
291
+ # Vectorized stack calculation for Showdown
292
+ player_teams = working_df.iloc[:, 2:].apply(
293
+ lambda x: x.map(team_map).fillna('')
 
 
 
 
 
 
 
 
 
 
294
  )
295
+
296
+ stack_results = player_teams.apply(get_most_common_team, axis=1)
297
+ working_df['stack'] = [result[0] for result in stack_results]
298
+ working_df['stack_size'] = [result[1] for result in stack_results]
299
+
300
  if sport_select == 'GOLF':
301
+ # Vectorized calculations for GOLF
302
+ player_salaries = working_df.apply(
303
+ lambda x: x.map(salary_map).fillna(0)
 
 
 
 
 
304
  )
305
+ working_df['salary'] = player_salaries.sum(axis=1)
306
+
307
+ player_fpts = working_df.apply(
308
+ lambda x: x.map(actual_map).fillna(0)
 
309
  )
310
+ working_df['actual_fpts'] = player_fpts.sum(axis=1)
311
+ else:
312
+ # Vectorized calculations with 1.5x multiplier for first player
313
+ first_player_salary = working_df.iloc[:, 2].map(salary_map).fillna(0) * 1.5
314
+ other_players_salary = working_df.iloc[:, 3:].apply(
315
+ lambda x: x.map(salary_map).fillna(0)
316
+ ).sum(axis=1)
317
+ working_df['salary'] = first_player_salary + other_players_salary
318
+
319
+ first_player_fpts = working_df.iloc[:, 2].map(actual_map).fillna(0) * 1.5
320
+ other_players_fpts = working_df.iloc[:, 3:].apply(
321
+ lambda x: x.map(actual_map).fillna(0)
322
+ ).sum(axis=1)
323
+ working_df['actual_fpts'] = first_player_fpts + other_players_fpts
324
+
325
+ # Vectorized actual_own calculation
326
+ player_ownership = working_df.apply(
327
+ lambda x: x.map(ownership_map).fillna(0)
328
+ )
329
+ working_df['actual_own'] = player_ownership.sum(axis=1)
330
+
331
+ # Vectorized duplication calculation
332
  working_df['sorted'] = working_df[st.session_state['player_columns']].apply(
333
+ lambda row: ','.join(sorted(row.values)), axis=1
 
334
  )
335
  working_df['dupes'] = working_df.groupby('sorted').transform('size')
336
+
337
+ # Vectorized unique calculations
338
+ working_df['uniques'] = working_df.groupby('BaseName')['dupes'].transform(
339
+ lambda x: (x == 1).sum()
340
+ )
341
+ working_df['under_5'] = working_df.groupby('BaseName')['dupes'].transform(
342
+ lambda x: (x <= 5).sum()
343
+ )
344
+ working_df['under_10'] = working_df.groupby('BaseName')['dupes'].transform(
345
+ lambda x: (x <= 10).sum()
346
+ )
347
+
 
348
  working_df = working_df.reset_index()
349
  working_df['percentile_finish'] = working_df['index'].rank(pct=True)
350
  working_df['finish'] = working_df['index']
351
  working_df = working_df.drop(['sorted', 'index'], axis=1)
352
+
353
+ # Store results
354
  st.session_state['field_player_frame'] = create_player_exposures(working_df, st.session_state['player_columns'])
355
  st.session_state['field_stack_frame'] = create_stack_exposures(working_df)
356
  st.session_state['display_contest_info'] = working_df.copy()
357
  st.session_state['contest_info_reset'] = working_df.copy()
358
  st.session_state['unique_players'] = pd.unique(st.session_state['display_contest_info'][st.session_state['player_columns']].values.ravel('K'))
359
+ st.session_state['unique_players'] = [p for p in st.session_state['unique_players'] if p != 'nan']
360
 
361
  if 'display_contest_info' in st.session_state:
362
  with st.expander("Info and filters"):
 
381
  st.session_state['remove_names'] = []
382
  st.session_state['display_contest_info'] = st.session_state['contest_info_reset'].copy()
383
  st.session_state['unique_players'] = pd.unique(st.session_state['display_contest_info'][st.session_state['player_columns']].values.ravel('K'))
384
+ st.session_state['unique_players'] = [p for p in st.session_state['unique_players'] if p != 'nan']
385
 
386
  with st.form(key='filter_form'):
387
  users_var, entries_var, stack_var, stack_size_var, player_var, remove_var = st.columns(6)