James McCool commited on
Commit
d04558f
·
1 Parent(s): 58cea02

Add functionality for player name cleaning and CSV mismatch detection

Browse files

- Updated `app.py` to remove the "Late Swap" tab and simplify the interface.
- Introduced new functions in `global_func`:
- `clean_player_name.py`: Cleans player names by removing unnecessary characters.
- `find_csv_mismatches.py`: Identifies mismatches between uploaded CSV files and projections.
- `find_name_mismatches.py`: Detects name mismatches in player portfolios.
- `highlight_rows.py`: Adds functionality to highlight changes in dataframes.
- `load_csv.py`, `load_file.py`, `load_ss_file.py`: Functions for loading CSV and Excel files with error handling.
- `optimize_lineup.py`: Optimizes player lineups based on various criteria.
- `predict_dupes.py`: Predicts duplicate players in lineups based on ownership and contest size.
- Enhanced user experience with improved error messages and data handling.

app.py CHANGED
@@ -20,7 +20,7 @@ freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '
20
  player_wrong_names_mlb = ['Enrique Hernandez']
21
  player_right_names_mlb = ['Kike Hernandez']
22
 
23
- tab1, tab2, tab3 = st.tabs(["Data Load", "Late Swap", "Manage Portfolio"])
24
  with tab1:
25
  if st.button('Clear data', key='reset1'):
26
  st.session_state.clear()
@@ -29,7 +29,7 @@ with tab1:
29
 
30
  with col1:
31
  st.subheader("Draftkings/Fanduel CSV")
32
- st.info("Upload the player pricing CSV from the site you are playing on. This is used in late swap exporting and/or with SaberSim portfolios, but is not necessary for the portfolio management functions.")
33
 
34
  upload_csv_col, csv_template_col = st.columns([3, 1])
35
  with upload_csv_col:
@@ -169,518 +169,6 @@ with tab1:
169
  st.session_state['export_dict'] = dict(zip(projections['player_names'], projections['upload_match']))
170
 
171
  with tab2:
172
- if st.button('Clear data', key='reset2'):
173
- st.session_state.clear()
174
-
175
- if 'portfolio' in st.session_state and 'projections_df' in st.session_state:
176
-
177
- optimized_df = None
178
-
179
- map_dict = {
180
- 'pos_map': dict(zip(st.session_state['projections_df']['player_names'],
181
- st.session_state['projections_df']['position'])),
182
- 'salary_map': dict(zip(st.session_state['projections_df']['player_names'],
183
- st.session_state['projections_df']['salary'])),
184
- 'proj_map': dict(zip(st.session_state['projections_df']['player_names'],
185
- st.session_state['projections_df']['median'])),
186
- 'own_map': dict(zip(st.session_state['projections_df']['player_names'],
187
- st.session_state['projections_df']['ownership'])),
188
- 'team_map': dict(zip(st.session_state['projections_df']['player_names'],
189
- st.session_state['projections_df']['team']))
190
- }
191
- # Calculate new stats for optimized lineups
192
- st.session_state['portfolio']['salary'] = st.session_state['portfolio'].apply(
193
- lambda row: sum(map_dict['salary_map'].get(player, 0) for player in row if player in map_dict['salary_map']), axis=1
194
- )
195
- st.session_state['portfolio']['median'] = st.session_state['portfolio'].apply(
196
- lambda row: sum(map_dict['proj_map'].get(player, 0) for player in row if player in map_dict['proj_map']), axis=1
197
- )
198
-
199
- st.session_state['portfolio']['Own'] = st.session_state['portfolio'].apply(
200
- lambda row: sum(map_dict['own_map'].get(player, 0) for player in row if player in map_dict['own_map']), axis=1
201
- )
202
-
203
- options_container = st.container()
204
- with options_container:
205
- col1, col2, col3, col4, col5, col6 = st.columns(6)
206
- with col1:
207
- curr_site_var = st.selectbox("Select your current site", options=['DraftKings', 'FanDuel'])
208
- with col2:
209
- curr_sport_var = st.selectbox("Select your current sport", options=['NBA', 'MLB', 'NFL', 'NHL', 'MMA'])
210
- with col3:
211
- swap_var = st.multiselect("Select late swap strategy", options=['Optimize', 'Increase volatility', 'Decrease volatility'])
212
- with col4:
213
- remove_teams_var = st.multiselect("What teams have already played?", options=st.session_state['projections_df']['team'].unique())
214
- with col5:
215
- winners_var = st.multiselect("Are there any players doing exceptionally well?", options=st.session_state['projections_df']['player_names'].unique(), max_selections=3)
216
- with col6:
217
- losers_var = st.multiselect("Are there any players doing exceptionally poorly?", options=st.session_state['projections_df']['player_names'].unique(), max_selections=3)
218
- if st.button('Clear Late Swap'):
219
- if 'optimized_df' in st.session_state:
220
- del st.session_state['optimized_df']
221
-
222
- map_dict = {
223
- 'pos_map': dict(zip(st.session_state['projections_df']['player_names'],
224
- st.session_state['projections_df']['position'])),
225
- 'salary_map': dict(zip(st.session_state['projections_df']['player_names'],
226
- st.session_state['projections_df']['salary'])),
227
- 'proj_map': dict(zip(st.session_state['projections_df']['player_names'],
228
- st.session_state['projections_df']['median'])),
229
- 'own_map': dict(zip(st.session_state['projections_df']['player_names'],
230
- st.session_state['projections_df']['ownership'])),
231
- 'team_map': dict(zip(st.session_state['projections_df']['player_names'],
232
- st.session_state['projections_df']['team']))
233
- }
234
- # Calculate new stats for optimized lineups
235
- st.session_state['portfolio']['salary'] = st.session_state['portfolio'].apply(
236
- lambda row: sum(map_dict['salary_map'].get(player, 0) for player in row if player in map_dict['salary_map']), axis=1
237
- )
238
- st.session_state['portfolio']['median'] = st.session_state['portfolio'].apply(
239
- lambda row: sum(map_dict['proj_map'].get(player, 0) for player in row if player in map_dict['proj_map']), axis=1
240
- )
241
- st.session_state['portfolio']['Own'] = st.session_state['portfolio'].apply(
242
- lambda row: sum(map_dict['own_map'].get(player, 0) for player in row if player in map_dict['own_map']), axis=1
243
- )
244
-
245
- if st.button('Run Late Swap'):
246
- st.session_state['portfolio'] = st.session_state['portfolio'].drop(columns=['salary', 'median', 'Own'])
247
- if curr_sport_var == 'NBA':
248
- if curr_site_var == 'DraftKings':
249
- st.session_state['portfolio'] = st.session_state['portfolio'].set_axis(['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'UTIL'], axis=1)
250
- else:
251
- st.session_state['portfolio'] = st.session_state['portfolio'].set_axis(['PG', 'PG', 'SG', 'SG', 'SF', 'SF', 'PF', 'PF', 'C'], axis=1)
252
-
253
- # Define roster position rules
254
- if curr_site_var == 'DraftKings':
255
- position_rules = {
256
- 'PG': ['PG'],
257
- 'SG': ['SG'],
258
- 'SF': ['SF'],
259
- 'PF': ['PF'],
260
- 'C': ['C'],
261
- 'G': ['PG', 'SG'],
262
- 'F': ['SF', 'PF'],
263
- 'UTIL': ['PG', 'SG', 'SF', 'PF', 'C']
264
- }
265
- else:
266
- position_rules = {
267
- 'PG': ['PG'],
268
- 'SG': ['SG'],
269
- 'SF': ['SF'],
270
- 'PF': ['PF'],
271
- 'C': ['C'],
272
- }
273
- # Create position groups from projections data
274
- position_groups = {}
275
- for _, player in st.session_state['projections_df'].iterrows():
276
- positions = player['position'].split('/')
277
- for pos in positions:
278
- if pos not in position_groups:
279
- position_groups[pos] = []
280
- position_groups[pos].append({
281
- 'player_names': player['player_names'],
282
- 'salary': player['salary'],
283
- 'median': player['median'],
284
- 'ownership': player['ownership'],
285
- 'positions': positions # Store all eligible positions
286
- })
287
-
288
- def optimize_lineup(row):
289
- current_lineup = []
290
- total_salary = 0
291
- if curr_site_var == 'DraftKings':
292
- salary_cap = 50000
293
- else:
294
- salary_cap = 60000
295
- used_players = set()
296
-
297
- # Convert row to dictionary with roster positions
298
- roster = {}
299
- for col, player in zip(row.index, row):
300
- if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Lineup Edge']:
301
- roster[col] = {
302
- 'name': player,
303
- 'position': map_dict['pos_map'].get(player, '').split('/'),
304
- 'team': map_dict['team_map'].get(player, ''),
305
- 'salary': map_dict['salary_map'].get(player, 0),
306
- 'median': map_dict['proj_map'].get(player, 0),
307
- 'ownership': map_dict['own_map'].get(player, 0)
308
- }
309
- total_salary += roster[col]['salary']
310
- used_players.add(player)
311
-
312
- # Optimize each roster position in random order
313
- roster_positions = list(roster.items())
314
- random.shuffle(roster_positions)
315
-
316
- for roster_pos, current in roster_positions:
317
- # Skip optimization for players from removed teams
318
- if current['team'] in remove_teams_var:
319
- continue
320
-
321
- valid_positions = position_rules[roster_pos]
322
- better_options = []
323
-
324
- # Find valid replacements for this roster position
325
- for pos in valid_positions:
326
- if pos in position_groups:
327
- pos_options = [
328
- p for p in position_groups[pos]
329
- if p['median'] > current['median']
330
- and (total_salary - current['salary'] + p['salary']) <= salary_cap
331
- and p['player_names'] not in used_players
332
- and any(valid_pos in p['positions'] for valid_pos in valid_positions)
333
- and map_dict['team_map'].get(p['player_names']) not in remove_teams_var # Check team restriction
334
- ]
335
- better_options.extend(pos_options)
336
-
337
- if better_options:
338
- # Remove duplicates
339
- better_options = {opt['player_names']: opt for opt in better_options}.values()
340
-
341
- # Sort by median projection and take the best one
342
- best_replacement = max(better_options, key=lambda x: x['median'])
343
-
344
- # Update the lineup and tracking variables
345
- used_players.remove(current['name'])
346
- used_players.add(best_replacement['player_names'])
347
- total_salary = total_salary - current['salary'] + best_replacement['salary']
348
- roster[roster_pos] = {
349
- 'name': best_replacement['player_names'],
350
- 'position': map_dict['pos_map'][best_replacement['player_names']].split('/'),
351
- 'team': map_dict['team_map'][best_replacement['player_names']],
352
- 'salary': best_replacement['salary'],
353
- 'median': best_replacement['median'],
354
- 'ownership': best_replacement['ownership']
355
- }
356
-
357
- # Return optimized lineup maintaining original column order
358
- return [roster[pos]['name'] for pos in row.index if pos in roster]
359
-
360
- def optimize_lineup_winners(row):
361
- current_lineup = []
362
- total_salary = 0
363
- if curr_site_var == 'DraftKings':
364
- salary_cap = 50000
365
- else:
366
- salary_cap = 60000
367
- used_players = set()
368
-
369
- # Check if any winners are in the lineup and count them
370
- winners_in_lineup = sum(1 for player in row if player in winners_var)
371
- changes_needed = min(winners_in_lineup, 3) if winners_in_lineup > 0 else 0
372
- changes_made = 0
373
-
374
- # Convert row to dictionary with roster positions
375
- roster = {}
376
- for col, player in zip(row.index, row):
377
- if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Lineup Edge']:
378
- roster[col] = {
379
- 'name': player,
380
- 'position': map_dict['pos_map'].get(player, '').split('/'),
381
- 'team': map_dict['team_map'].get(player, ''),
382
- 'salary': map_dict['salary_map'].get(player, 0),
383
- 'median': map_dict['proj_map'].get(player, 0),
384
- 'ownership': map_dict['own_map'].get(player, 0)
385
- }
386
- total_salary += roster[col]['salary']
387
- used_players.add(player)
388
-
389
- # Only proceed with ownership-based optimization if we have winners in the lineup
390
- if changes_needed > 0:
391
- # Randomize the order of positions to optimize
392
- roster_positions = list(roster.items())
393
- random.shuffle(roster_positions)
394
-
395
- for roster_pos, current in roster_positions:
396
- # Stop if we've made enough changes
397
- if changes_made >= changes_needed:
398
- break
399
-
400
- # Skip optimization for players from removed teams or if the current player is a winner
401
- if current['team'] in remove_teams_var or current['name'] in winners_var:
402
- continue
403
-
404
- valid_positions = list(position_rules[roster_pos])
405
- random.shuffle(valid_positions)
406
- better_options = []
407
-
408
- # Find valid replacements with higher ownership
409
- for pos in valid_positions:
410
- if pos in position_groups:
411
- pos_options = [
412
- p for p in position_groups[pos]
413
- if p['ownership'] > current['ownership']
414
- and p['median'] >= current['median'] - 3
415
- and (total_salary - current['salary'] + p['salary']) <= salary_cap
416
- and (total_salary - current['salary'] + p['salary']) >= salary_cap - 1000
417
- and p['player_names'] not in used_players
418
- and any(valid_pos in p['positions'] for valid_pos in valid_positions)
419
- and map_dict['team_map'].get(p['player_names']) not in remove_teams_var
420
- ]
421
- better_options.extend(pos_options)
422
-
423
- if better_options:
424
- # Remove duplicates
425
- better_options = {opt['player_names']: opt for opt in better_options}.values()
426
-
427
- # Sort by ownership and take the highest owned option
428
- best_replacement = max(better_options, key=lambda x: x['ownership'])
429
-
430
- # Update the lineup and tracking variables
431
- used_players.remove(current['name'])
432
- used_players.add(best_replacement['player_names'])
433
- total_salary = total_salary - current['salary'] + best_replacement['salary']
434
- roster[roster_pos] = {
435
- 'name': best_replacement['player_names'],
436
- 'position': map_dict['pos_map'][best_replacement['player_names']].split('/'),
437
- 'team': map_dict['team_map'][best_replacement['player_names']],
438
- 'salary': best_replacement['salary'],
439
- 'median': best_replacement['median'],
440
- 'ownership': best_replacement['ownership']
441
- }
442
- changes_made += 1
443
-
444
- # Return optimized lineup maintaining original column order
445
- return [roster[pos]['name'] for pos in row.index if pos in roster]
446
-
447
- def optimize_lineup_losers(row):
448
- current_lineup = []
449
- total_salary = 0
450
- if curr_site_var == 'DraftKings':
451
- salary_cap = 50000
452
- else:
453
- salary_cap = 60000
454
- used_players = set()
455
-
456
- # Check if any winners are in the lineup and count them
457
- losers_in_lineup = sum(1 for player in row if player in losers_var)
458
- changes_needed = min(losers_in_lineup, 3) if losers_in_lineup > 0 else 0
459
- changes_made = 0
460
-
461
- # Convert row to dictionary with roster positions
462
- roster = {}
463
- for col, player in zip(row.index, row):
464
- if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Lineup Edge']:
465
- roster[col] = {
466
- 'name': player,
467
- 'position': map_dict['pos_map'].get(player, '').split('/'),
468
- 'team': map_dict['team_map'].get(player, ''),
469
- 'salary': map_dict['salary_map'].get(player, 0),
470
- 'median': map_dict['proj_map'].get(player, 0),
471
- 'ownership': map_dict['own_map'].get(player, 0)
472
- }
473
- total_salary += roster[col]['salary']
474
- used_players.add(player)
475
-
476
- # Only proceed with ownership-based optimization if we have winners in the lineup
477
- if changes_needed > 0:
478
- # Randomize the order of positions to optimize
479
- roster_positions = list(roster.items())
480
- random.shuffle(roster_positions)
481
-
482
- for roster_pos, current in roster_positions:
483
- # Stop if we've made enough changes
484
- if changes_made >= changes_needed:
485
- break
486
-
487
- # Skip optimization for players from removed teams or if the current player is a winner
488
- if current['team'] in remove_teams_var or current['name'] in losers_var:
489
- continue
490
-
491
- valid_positions = list(position_rules[roster_pos])
492
- random.shuffle(valid_positions)
493
- better_options = []
494
-
495
- # Find valid replacements with higher ownership
496
- for pos in valid_positions:
497
- if pos in position_groups:
498
- pos_options = [
499
- p for p in position_groups[pos]
500
- if p['ownership'] < current['ownership']
501
- and p['median'] >= current['median'] - 3
502
- and (total_salary - current['salary'] + p['salary']) <= salary_cap
503
- and (total_salary - current['salary'] + p['salary']) >= salary_cap - 1000
504
- and p['player_names'] not in used_players
505
- and any(valid_pos in p['positions'] for valid_pos in valid_positions)
506
- and map_dict['team_map'].get(p['player_names']) not in remove_teams_var
507
- ]
508
- better_options.extend(pos_options)
509
-
510
- if better_options:
511
- # Remove duplicates
512
- better_options = {opt['player_names']: opt for opt in better_options}.values()
513
-
514
- # Sort by ownership and take the highest owned option
515
- best_replacement = max(better_options, key=lambda x: x['ownership'])
516
-
517
- # Update the lineup and tracking variables
518
- used_players.remove(current['name'])
519
- used_players.add(best_replacement['player_names'])
520
- total_salary = total_salary - current['salary'] + best_replacement['salary']
521
- roster[roster_pos] = {
522
- 'name': best_replacement['player_names'],
523
- 'position': map_dict['pos_map'][best_replacement['player_names']].split('/'),
524
- 'team': map_dict['team_map'][best_replacement['player_names']],
525
- 'salary': best_replacement['salary'],
526
- 'median': best_replacement['median'],
527
- 'ownership': best_replacement['ownership']
528
- }
529
- changes_made += 1
530
-
531
- # Return optimized lineup maintaining original column order
532
- return [roster[pos]['name'] for pos in row.index if pos in roster]
533
-
534
- # Create a progress bar
535
- progress_bar = st.progress(0)
536
- status_text = st.empty()
537
-
538
- # Process each lineup
539
- optimized_lineups = []
540
- total_lineups = len(st.session_state['portfolio'])
541
-
542
- for idx, row in st.session_state['portfolio'].iterrows():
543
- # First optimization pass
544
- first_pass = optimize_lineup(row)
545
- first_pass_series = pd.Series(first_pass, index=row.index)
546
-
547
- second_pass = optimize_lineup(first_pass_series)
548
- second_pass_series = pd.Series(second_pass, index=row.index)
549
-
550
- third_pass = optimize_lineup(second_pass_series)
551
- third_pass_series = pd.Series(third_pass, index=row.index)
552
-
553
- fourth_pass = optimize_lineup(third_pass_series)
554
- fourth_pass_series = pd.Series(fourth_pass, index=row.index)
555
-
556
- fifth_pass = optimize_lineup(fourth_pass_series)
557
- fifth_pass_series = pd.Series(fifth_pass, index=row.index)
558
-
559
- # Second optimization pass
560
- final_lineup = optimize_lineup(fifth_pass_series)
561
- optimized_lineups.append(final_lineup)
562
-
563
- if 'Optimize' in swap_var:
564
- progress = (idx + 1) / total_lineups
565
- progress_bar.progress(progress)
566
- status_text.text(f'Optimizing Lineups {idx + 1} of {total_lineups}')
567
- else:
568
- pass
569
-
570
- # Create new dataframe with optimized lineups
571
- if 'Optimize' in swap_var:
572
- st.session_state['optimized_df_medians'] = pd.DataFrame(optimized_lineups, columns=st.session_state['portfolio'].columns)
573
- else:
574
- st.session_state['optimized_df_medians'] = st.session_state['portfolio']
575
-
576
- # Create a progress bar
577
- progress_bar_winners = st.progress(0)
578
- status_text_winners = st.empty()
579
-
580
- # Process each lineup
581
- optimized_lineups_winners = []
582
- total_lineups = len(st.session_state['optimized_df_medians'])
583
-
584
- for idx, row in st.session_state['optimized_df_medians'].iterrows():
585
-
586
- final_lineup = optimize_lineup_winners(row)
587
- optimized_lineups_winners.append(final_lineup)
588
-
589
- if 'Decrease volatility' in swap_var:
590
- progress_winners = (idx + 1) / total_lineups
591
- progress_bar_winners.progress(progress_winners)
592
- status_text_winners.text(f'Lowering Volatility around Winners {idx + 1} of {total_lineups}')
593
- else:
594
- pass
595
-
596
- # Create new dataframe with optimized lineups
597
- if 'Decrease volatility' in swap_var:
598
- st.session_state['optimized_df_winners'] = pd.DataFrame(optimized_lineups_winners, columns=st.session_state['optimized_df_medians'].columns)
599
- else:
600
- st.session_state['optimized_df_winners'] = st.session_state['optimized_df_medians']
601
-
602
- # Create a progress bar
603
- progress_bar_losers = st.progress(0)
604
- status_text_losers = st.empty()
605
-
606
- # Process each lineup
607
- optimized_lineups_losers = []
608
- total_lineups = len(st.session_state['optimized_df_winners'])
609
-
610
- for idx, row in st.session_state['optimized_df_winners'].iterrows():
611
-
612
- final_lineup = optimize_lineup_losers(row)
613
- optimized_lineups_losers.append(final_lineup)
614
-
615
- if 'Increase volatility' in swap_var:
616
- progress_losers = (idx + 1) / total_lineups
617
- progress_bar_losers.progress(progress_losers)
618
- status_text_losers.text(f'Increasing Volatility around Losers {idx + 1} of {total_lineups}')
619
- else:
620
- pass
621
-
622
- # Create new dataframe with optimized lineups
623
- if 'Increase volatility' in swap_var:
624
- st.session_state['optimized_df'] = pd.DataFrame(optimized_lineups_losers, columns=st.session_state['optimized_df_winners'].columns)
625
- else:
626
- st.session_state['optimized_df'] = st.session_state['optimized_df_winners']
627
-
628
- # Calculate new stats for optimized lineups
629
- st.session_state['optimized_df']['salary'] = st.session_state['optimized_df'].apply(
630
- lambda row: sum(map_dict['salary_map'].get(player, 0) for player in row if player in map_dict['salary_map']), axis=1
631
- )
632
- st.session_state['optimized_df']['median'] = st.session_state['optimized_df'].apply(
633
- lambda row: sum(map_dict['proj_map'].get(player, 0) for player in row if player in map_dict['proj_map']), axis=1
634
- )
635
- st.session_state['optimized_df']['Own'] = st.session_state['optimized_df'].apply(
636
- lambda row: sum(map_dict['own_map'].get(player, 0) for player in row if player in map_dict['own_map']), axis=1
637
- )
638
-
639
- # Display results
640
- st.success('Optimization complete!')
641
-
642
- if 'optimized_df' in st.session_state:
643
- st.write("Increase in median highlighted in yellow, descrease in volatility highlighted in blue, increase in volatility highlighted in red:")
644
- st.dataframe(
645
- st.session_state['optimized_df'].style
646
- .apply(highlight_changes, axis=1)
647
- .apply(highlight_changes_winners, axis=1)
648
- .apply(highlight_changes_losers, axis=1)
649
- .background_gradient(axis=0)
650
- .background_gradient(cmap='RdYlGn')
651
- .format(precision=2),
652
- height=1000,
653
- use_container_width=True
654
- )
655
-
656
- # Option to download optimized lineups
657
- if st.button('Prepare Late Swap Export'):
658
- export_df = st.session_state['optimized_df'].copy()
659
-
660
- # Map player names to their export IDs for all player columns
661
- for col in export_df.columns:
662
- if col not in ['salary', 'median', 'Own']:
663
- export_df[col] = export_df[col].map(st.session_state['export_dict'])
664
-
665
- csv = export_df.to_csv(index=False)
666
- st.download_button(
667
- label="Download CSV",
668
- data=csv,
669
- file_name="optimized_lineups.csv",
670
- mime="text/csv"
671
- )
672
- else:
673
- st.write("Current Portfolio")
674
- st.dataframe(
675
- st.session_state['portfolio'].style
676
- .background_gradient(axis=0)
677
- .background_gradient(cmap='RdYlGn')
678
- .format(precision=2),
679
- height=1000,
680
- use_container_width=True
681
- )
682
-
683
- with tab3:
684
  if st.button('Clear data', key='reset3'):
685
  st.session_state.clear()
686
  if 'portfolio' in st.session_state and 'projections_df' in st.session_state:
 
20
  player_wrong_names_mlb = ['Enrique Hernandez']
21
  player_right_names_mlb = ['Kike Hernandez']
22
 
23
+ tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"])
24
  with tab1:
25
  if st.button('Clear data', key='reset1'):
26
  st.session_state.clear()
 
29
 
30
  with col1:
31
  st.subheader("Draftkings/Fanduel CSV")
32
+ st.info("Upload the player pricing CSV from the site you are playing on.")
33
 
34
  upload_csv_col, csv_template_col = st.columns([3, 1])
35
  with upload_csv_col:
 
169
  st.session_state['export_dict'] = dict(zip(projections['player_names'], projections['upload_match']))
170
 
171
  with tab2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  if st.button('Clear data', key='reset3'):
173
  st.session_state.clear()
174
  if 'portfolio' in st.session_state and 'projections_df' in st.session_state:
global_func/clean_player_name.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ def clean_player_name(name):
8
+ # Handle colon case first (remove everything before colon)
9
+ if ':' in name:
10
+ name = name.split(':')[1].strip()
11
+
12
+ # Handle parentheses case (remove everything after opening parenthesis)
13
+ if '(' in name:
14
+ name = name.split('(')[0].strip()
15
+
16
+ return name
global_func/find_csv_mismatches.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ from fuzzywuzzy import process
5
+
6
+ def find_csv_mismatches(csv_df, projections_df):
7
+ # Create copies of the dataframes to avoid modifying the originals
8
+ csv_df = csv_df.copy()
9
+ projections_df = projections_df.copy()
10
+
11
+ if 'Name' not in csv_df.columns:
12
+ st.error("No 'Name' column found in CSV file")
13
+ return csv_df
14
+
15
+ if 'player_names' not in projections_df.columns:
16
+ st.error("No 'player_names' column found in projections file")
17
+ return csv_df
18
+
19
+ # Get unique player names from CSV and projections
20
+ csv_players = set(csv_df['Name'].dropna().unique())
21
+ projection_players = set(projections_df['player_names'].unique())
22
+ projection_players_list = list(csv_players)
23
+
24
+ # Find players in CSV that are missing from projections
25
+ players_missing_from_projections = list(projection_players - csv_players)
26
+
27
+ # Automatically handle 100% matches before starting interactive process
28
+ players_to_process = []
29
+ for player in players_missing_from_projections:
30
+ if not isinstance(player, str):
31
+ st.warning(f"Skipping non-string value: {player}")
32
+ continue
33
+ closest_matches = process.extract(player, projection_players_list, limit=1)
34
+ if closest_matches[0][1] == 100: # If perfect match found
35
+ match_name = closest_matches[0][0]
36
+ # Update CSV DataFrame to use the projection name
37
+ csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name
38
+ st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
39
+ else:
40
+ players_to_process.append(player)
41
+
42
+ # Initialize session state for tracking current player if not exists
43
+ if 'csv_current_player_index' not in st.session_state:
44
+ st.session_state.csv_current_player_index = 0
45
+ st.session_state.csv_players_to_process = players_to_process
46
+
47
+ # Display results
48
+ if players_missing_from_projections:
49
+ st.warning("Players in CSV but missing from projections")
50
+
51
+ # Display remaining players
52
+ remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:]
53
+ st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
54
+ "\n".join(f"- {player}" for player in remaining_players))
55
+
56
+ if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process):
57
+ current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index]
58
+
59
+ # Find the top 3 closest matches
60
+ closest_matches = process.extract(current_player, projection_players_list, limit=3)
61
+
62
+ st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}")
63
+
64
+ # Create radio buttons for selection
65
+ options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
66
+ options.append("None of these")
67
+
68
+ selected_option = st.radio(
69
+ f"Select correct match:",
70
+ options,
71
+ key=f"csv_radio_{current_player}"
72
+ )
73
+
74
+ if st.button("Confirm Selection", key="csv_confirm"):
75
+ if selected_option != "None of these":
76
+ selected_name = selected_option.split(" (")[0]
77
+ # Update CSV DataFrame
78
+ csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name
79
+ st.success(f"Replaced '{current_player}' with '{selected_name}'")
80
+ st.session_state['csv_file'] = csv_df
81
+
82
+ # Move to next player
83
+ st.session_state.csv_current_player_index += 1
84
+ st.rerun()
85
+ else:
86
+ st.success("All players have been processed!")
87
+ # Reset the index for future runs
88
+ st.session_state.csv_current_player_index = 0
89
+ st.session_state.csv_players_to_process = []
90
+ else:
91
+ st.success("All CSV players found in projections!")
92
+
93
+ return csv_df
global_func/find_name_mismatches.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ def find_name_mismatches(portfolio_df, projections_df):
8
+ # Create a copy of the projections dataframe to avoid modifying the original
9
+ projections_df = projections_df.copy()
10
+
11
+ col_count = len(portfolio_df.columns)
12
+ portfolio_df.columns = range(col_count)
13
+
14
+ if 'player_names' not in projections_df.columns:
15
+ st.error("No 'player_names' column found in projections file")
16
+ return projections_df
17
+
18
+ # Get unique player names from portfolio and projections
19
+ portfolio_players = set()
20
+ for col in portfolio_df.columns:
21
+ portfolio_players.update(portfolio_df[col].unique())
22
+ projection_players = set(projections_df['player_names'].unique())
23
+ projection_players_list = list(projection_players)
24
+
25
+ # Find players in portfolio that are missing from projections
26
+ players_missing_from_projections = list(portfolio_players - projection_players)
27
+
28
+ # Automatically handle 100% matches before starting interactive process
29
+ players_to_process = []
30
+ for player in players_missing_from_projections:
31
+ if not isinstance(player, str):
32
+ st.warning(f"Skipping non-string value: {player}")
33
+ continue
34
+ closest_matches = process.extract(player, projection_players_list, limit=1)
35
+ if closest_matches[0][1] == 100: # If perfect match found
36
+ match_name = closest_matches[0][0]
37
+ projections_df.loc[projections_df['player_names'] == match_name, 'player_names'] = player
38
+ st.success(f"Automatically matched '{match_name}' with '{player}' (100% match)")
39
+ else:
40
+ players_to_process.append(player)
41
+
42
+ # Initialize session state for tracking current player if not exists
43
+ if 'current_player_index' not in st.session_state:
44
+ st.session_state.current_player_index = 0
45
+ st.session_state.players_to_process = players_to_process
46
+
47
+ # Display results
48
+ if players_missing_from_projections:
49
+ st.warning("Players in portfolio but missing from projections")
50
+
51
+ # Display remaining players
52
+ remaining_players = st.session_state.players_to_process[st.session_state.current_player_index:]
53
+ st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
54
+ "\n".join(f"- {player}" for player in remaining_players))
55
+
56
+ if st.session_state.current_player_index < len(st.session_state.players_to_process):
57
+ current_player = st.session_state.players_to_process[st.session_state.current_player_index]
58
+
59
+ # Find the top 3 closest matches
60
+ closest_matches = process.extract(current_player, projection_players_list, limit=3)
61
+
62
+ st.write(f"**Missing Player {st.session_state.current_player_index + 1} of {len(st.session_state.players_to_process)}:** {current_player}")
63
+
64
+ # Create radio buttons for selection
65
+ options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
66
+ options.append("None of these")
67
+
68
+ selected_option = st.radio(
69
+ f"Select correct match:",
70
+ options,
71
+ key=f"radio_{current_player}"
72
+ )
73
+
74
+ if st.button("Confirm Selection"):
75
+ if selected_option != "None of these":
76
+ selected_name = selected_option.split(" (")[0]
77
+ projections_df.loc[projections_df['player_names'] == selected_name, 'player_names'] = current_player
78
+ st.success(f"Replaced '{selected_name}' with '{current_player}'")
79
+ st.session_state['projections_df'] = projections_df
80
+
81
+ # Move to next player
82
+ st.session_state.current_player_index += 1
83
+ st.rerun()
84
+ else:
85
+ st.success("All players have been processed!")
86
+ # Reset the index for future runs
87
+ st.session_state.current_player_index = 0
88
+ st.session_state.players_to_process = []
89
+ else:
90
+ st.success("All portfolio players found in projections!")
91
+
92
+ return projections_df
global_func/highlight_rows.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ def highlight_changes(row):
8
+ original_row = st.session_state['portfolio'].iloc[row.name]
9
+ colors = [''] * len(row)
10
+ for i, (orig, new) in enumerate(zip(original_row, row)):
11
+ if orig != new:
12
+ colors[i] = 'background-color: yellow'
13
+ return colors
14
+
15
+ def highlight_changes_winners(row):
16
+ original_row = st.session_state['optimized_df_medians'].iloc[row.name]
17
+ colors = [''] * len(row)
18
+ for i, (orig, new) in enumerate(zip(original_row, row)):
19
+ if orig != new:
20
+ colors[i] = 'background-color: aqua'
21
+ return colors
22
+
23
+ def highlight_changes_losers(row):
24
+ original_row = st.session_state['optimized_df_winners'].iloc[row.name]
25
+ colors = [''] * len(row)
26
+ for i, (orig, new) in enumerate(zip(original_row, row)):
27
+ if orig != new:
28
+ colors[i] = 'background-color: darksalmon'
29
+ return colors
global_func/load_csv.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ def load_csv(upload):
8
+ if upload is not None:
9
+ try:
10
+ if upload.name.endswith('.csv'):
11
+ df = pd.read_csv(upload)
12
+ try:
13
+ df['Name + ID'] = df['Name'] + ' (' + df['ID'].astype(str) + ')'
14
+ except:
15
+ pass
16
+ else:
17
+ st.error('Please upload either a CSV or Excel file')
18
+ return None
19
+
20
+ return df
21
+ except Exception as e:
22
+ st.error(f'Error loading file: {str(e)}')
23
+ return None
24
+ return None
global_func/load_file.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ ## import global functions
8
+ from global_func.clean_player_name import clean_player_name
9
+
10
+ def load_file(upload):
11
+ if upload is not None:
12
+ try:
13
+ if upload.name.endswith('.csv'):
14
+ df = pd.read_csv(upload)
15
+ elif upload.name.endswith(('.xls', '.xlsx')):
16
+ df = pd.read_excel(upload)
17
+ else:
18
+ st.error('Please upload either a CSV or Excel file')
19
+ return None
20
+
21
+ export_df = df.copy()
22
+
23
+ for col in df.columns:
24
+ if df[col].dtype == 'object':
25
+ df[col] = df[col].apply(lambda x: clean_player_name(x) if isinstance(x, str) else x)
26
+
27
+ return export_df, df
28
+ except Exception as e:
29
+ st.error(f'Error loading file: {str(e)}')
30
+ return None
31
+ return None
global_func/load_ss_file.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ def load_ss_file(lineups, csv_file):
8
+ df = csv_file.copy()
9
+ try:
10
+ name_dict = dict(zip(df['ID'], df['Name']))
11
+ except:
12
+ name_dict = dict(zip(df['Id'], df['Nickname']))
13
+
14
+ # Now load and process the lineups file
15
+ try:
16
+ if lineups.name.endswith('.csv'):
17
+ lineups_df = pd.read_csv(lineups)
18
+ elif lineups.name.endswith(('.xls', '.xlsx')):
19
+ lineups_df = pd.read_excel(lineups)
20
+ else:
21
+ st.error('Please upload either a CSV or Excel file for lineups')
22
+ return None, None
23
+
24
+ export_df = lineups_df.copy()
25
+
26
+ # Map the IDs to names
27
+ for col in lineups_df.columns:
28
+ lineups_df[col] = lineups_df[col].map(name_dict)
29
+
30
+ return export_df, lineups_df
31
+
32
+ except Exception as e:
33
+ st.error(f'Error loading lineups file: {str(e)}')
34
+ return None, None
global_func/optimize_lineup.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ def optimize_lineup(row):
8
+ current_lineup = []
9
+ total_salary = 0
10
+ salary_cap = 50000
11
+ used_players = set()
12
+
13
+ # Convert row to dictionary with roster positions
14
+ roster = {}
15
+ for col, player in zip(row.index, row):
16
+ if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Lineup Edge']:
17
+ roster[col] = {
18
+ 'name': player,
19
+ 'position': map_dict['pos_map'].get(player, '').split('/'),
20
+ 'team': map_dict['team_map'].get(player, ''),
21
+ 'salary': map_dict['salary_map'].get(player, 0),
22
+ 'median': map_dict['proj_map'].get(player, 0),
23
+ 'ownership': map_dict['own_map'].get(player, 0)
24
+ }
25
+ total_salary += roster[col]['salary']
26
+ used_players.add(player)
27
+
28
+ # Optimize each roster position in random order
29
+ roster_positions = list(roster.items())
30
+ random.shuffle(roster_positions)
31
+
32
+ for roster_pos, current in roster_positions:
33
+ # Skip optimization for players from removed teams
34
+ if current['team'] in remove_teams_var:
35
+ continue
36
+
37
+ valid_positions = position_rules[roster_pos]
38
+ better_options = []
39
+
40
+ # Find valid replacements for this roster position
41
+ for pos in valid_positions:
42
+ if pos in position_groups:
43
+ pos_options = [
44
+ p for p in position_groups[pos]
45
+ if p['median'] > current['median']
46
+ and (total_salary - current['salary'] + p['salary']) <= salary_cap
47
+ and p['player_names'] not in used_players
48
+ and any(valid_pos in p['positions'] for valid_pos in valid_positions)
49
+ and map_dict['team_map'].get(p['player_names']) not in remove_teams_var # Check team restriction
50
+ ]
51
+ better_options.extend(pos_options)
52
+
53
+ if better_options:
54
+ # Remove duplicates
55
+ better_options = {opt['player_names']: opt for opt in better_options}.values()
56
+
57
+ # Sort by median projection and take the best one
58
+ best_replacement = max(better_options, key=lambda x: x['median'])
59
+
60
+ # Update the lineup and tracking variables
61
+ used_players.remove(current['name'])
62
+ used_players.add(best_replacement['player_names'])
63
+ total_salary = total_salary - current['salary'] + best_replacement['salary']
64
+ roster[roster_pos] = {
65
+ 'name': best_replacement['player_names'],
66
+ 'position': map_dict['pos_map'][best_replacement['player_names']].split('/'),
67
+ 'team': map_dict['team_map'][best_replacement['player_names']],
68
+ 'salary': best_replacement['salary'],
69
+ 'median': best_replacement['median'],
70
+ 'ownership': best_replacement['ownership']
71
+ }
72
+
73
+ # Return optimized lineup maintaining original column order
74
+ return [roster[pos]['name'] for pos in row.index if pos in roster]
global_func/predict_dupes.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var):
8
+ if strength_var == 'Weak':
9
+ dupes_multiplier = .75
10
+ percentile_multiplier = .90
11
+ elif strength_var == 'Average':
12
+ dupes_multiplier = 1.00
13
+ percentile_multiplier = 1.00
14
+ elif strength_var == 'Sharp':
15
+ dupes_multiplier = 1.25
16
+ percentile_multiplier = 1.10
17
+ max_ownership = max(maps_dict['own_map'].values()) / 100
18
+ average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100
19
+ if site_var == 'Fanduel':
20
+ if type_var == 'Showdown':
21
+ dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
22
+ own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
23
+ calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
24
+ flex_ownerships = pd.concat([
25
+ portfolio.iloc[:,1].map(maps_dict['own_map']),
26
+ portfolio.iloc[:,2].map(maps_dict['own_map']),
27
+ portfolio.iloc[:,3].map(maps_dict['own_map']),
28
+ portfolio.iloc[:,4].map(maps_dict['own_map'])
29
+ ])
30
+ flex_rank = flex_ownerships.rank(pct=True)
31
+
32
+ # Assign ranks back to individual columns using the same rank scale
33
+ portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
34
+ portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
35
+ portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
36
+ portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
37
+ portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
38
+
39
+ portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
40
+ portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
41
+ portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
42
+ portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
43
+ portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
44
+
45
+ portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
46
+ portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
47
+ portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
48
+ portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
49
+
50
+ # Calculate dupes formula
51
+ portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100)
52
+ portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
53
+
54
+ # Round and handle negative values
55
+ portfolio['Dupes'] = np.where(
56
+ np.round(portfolio['dupes_calc'], 0) <= 0,
57
+ 0,
58
+ np.round(portfolio['dupes_calc'], 0) - 1
59
+ )
60
+ if type_var == 'Classic':
61
+ num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
62
+ dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
63
+ own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
64
+ calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
65
+ for i in range(1, num_players + 1):
66
+ portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
67
+ portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
68
+
69
+ portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
70
+ portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
71
+ portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
72
+ portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
73
+
74
+ portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100)
75
+ portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
76
+ # Round and handle negative values
77
+ portfolio['Dupes'] = np.where(
78
+ np.round(portfolio['dupes_calc'], 0) <= 0,
79
+ 0,
80
+ np.round(portfolio['dupes_calc'], 0) - 1
81
+ )
82
+
83
+ elif site_var == 'Draftkings':
84
+ if type_var == 'Showdown':
85
+ dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
86
+ own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
87
+ calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
88
+ flex_ownerships = pd.concat([
89
+ portfolio.iloc[:,1].map(maps_dict['own_map']),
90
+ portfolio.iloc[:,2].map(maps_dict['own_map']),
91
+ portfolio.iloc[:,3].map(maps_dict['own_map']),
92
+ portfolio.iloc[:,4].map(maps_dict['own_map']),
93
+ portfolio.iloc[:,5].map(maps_dict['own_map'])
94
+ ])
95
+ flex_rank = flex_ownerships.rank(pct=True)
96
+
97
+ # Assign ranks back to individual columns using the same rank scale
98
+ portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
99
+ portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
100
+ portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
101
+ portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
102
+ portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
103
+ portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
104
+
105
+ portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
106
+ portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
107
+ portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
108
+ portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
109
+ portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
110
+ portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100
111
+
112
+ portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
113
+ portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
114
+ portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
115
+ portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
116
+
117
+ # Calculate dupes formula
118
+ portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100)
119
+ portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
120
+
121
+ # Round and handle negative values
122
+ portfolio['Dupes'] = np.where(
123
+ np.round(portfolio['dupes_calc'], 0) <= 0,
124
+ 0,
125
+ np.round(portfolio['dupes_calc'], 0) - 1
126
+ )
127
+ if type_var == 'Classic':
128
+ num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
129
+ dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
130
+ own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
131
+ calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
132
+ for i in range(1, num_players + 1):
133
+ portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
134
+ portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
135
+
136
+ portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
137
+ portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
138
+ portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
139
+ portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
140
+
141
+ portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100)
142
+ portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
143
+ # Round and handle negative values
144
+ portfolio['Dupes'] = np.where(
145
+ np.round(portfolio['dupes_calc'], 0) <= 0,
146
+ 0,
147
+ np.round(portfolio['dupes_calc'], 0) - 1
148
+ )
149
+
150
+ portfolio['Dupes'] = np.round(portfolio['Dupes'], 0)
151
+ portfolio['own_ratio'] = np.where(
152
+ portfolio[own_columns].isin([max_ownership]).any(axis=1),
153
+ portfolio['own_sum'] / portfolio['own_average'],
154
+ (portfolio['own_sum'] - max_ownership) / portfolio['own_average']
155
+ )
156
+ percentile_cut_scalar = portfolio['median'].max() # Get scalar value
157
+ if type_var == 'Classic':
158
+ own_ratio_nerf = 2
159
+ elif type_var == 'Showdown':
160
+ own_ratio_nerf = 1.5
161
+ portfolio['Finish_percentile'] = portfolio.apply(
162
+ lambda row: .0005 if (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2) < .0005
163
+ else (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2),
164
+ axis=1
165
+ )
166
+
167
+ portfolio['Ref_Proj'] = portfolio['median'].max()
168
+ portfolio['Max_Proj'] = portfolio['Ref_Proj'] + 10
169
+ portfolio['Min_Proj'] = portfolio['Ref_Proj'] - 10
170
+ portfolio['Avg_Ref'] = (portfolio['Max_Proj'] + portfolio['Min_Proj']) / 2
171
+ portfolio['Win%'] = (((portfolio['median'] / portfolio['Avg_Ref']) - (0.1 + ((portfolio['Ref_Proj'] - portfolio['median'])/100))) / (Contest_Size / 1000)) / 10
172
+ max_allowed_win = (1 / Contest_Size) * 5
173
+ portfolio['Win%'] = portfolio['Win%'] / portfolio['Win%'].max() * max_allowed_win
174
+
175
+ portfolio['Finish_percentile'] = portfolio['Finish_percentile'] + .005 + (.005 * (Contest_Size / 10000))
176
+ portfolio['Finish_percentile'] = portfolio['Finish_percentile'] * percentile_multiplier
177
+ portfolio['Win%'] = portfolio['Win%'] * (1 - portfolio['Finish_percentile'])
178
+
179
+ portfolio['low_own_count'] = portfolio[own_columns].apply(lambda row: (row < 0.10).sum(), axis=1)
180
+ portfolio['Finish_percentile'] = portfolio.apply(lambda row: row['Finish_percentile'] if row['low_own_count'] <= 0 else row['Finish_percentile'] / row['low_own_count'], axis=1)
181
+ portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5))
182
+ portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1)
183
+ portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
184
+ portfolio = portfolio.drop(columns=dup_count_columns)
185
+ portfolio = portfolio.drop(columns=own_columns)
186
+ portfolio = portfolio.drop(columns=calc_columns)
187
+
188
+ return portfolio