James McCool commited on
Commit
7400821
·
1 Parent(s): 0f5f58e

Implement player name standardization in app.py to enhance data consistency across simulations. Added a new function to remove common suffixes from player names and updated relevant mappings to apply this standardization. This change improves the accuracy of player data handling for contest simulations on DraftKings and FanDuel.

Browse files
Files changed (1) hide show
  1. app.py +49 -38
app.py CHANGED
@@ -192,6 +192,15 @@ def init_baselines(sport):
192
 
193
  return dk_raw, fd_raw
194
 
 
 
 
 
 
 
 
 
 
195
  @st.cache_data
196
  def convert_df(array):
197
  array = pd.DataFrame(array, columns=column_names)
@@ -216,12 +225,14 @@ def sim_contest(Sim_size, seed_frame, maps_dict, sharp_split, Contest_Size):
216
  SimVar = 1
217
  Sim_Winners = []
218
  fp_array = seed_frame[:sharp_split, :]
 
 
219
 
220
  # Pre-vectorize functions
221
- vec_projection_map = np.vectorize(maps_dict['Projection_map'].__getitem__)
222
- vec_cpt_projection_map = np.vectorize(maps_dict['cpt_projection_map'].__getitem__)
223
- vec_stdev_map = np.vectorize(maps_dict['STDev_map'].__getitem__)
224
- vec_cpt_stdev_map = np.vectorize(maps_dict['cpt_STDev_map'].__getitem__)
225
 
226
  st.write('Simulating contest on frames')
227
 
@@ -432,15 +443,15 @@ with tab1:
432
  if st.button("Run Contest Sim"):
433
  if 'working_seed' in st.session_state:
434
  maps_dict = {
435
- 'Projection_map':dict(zip(raw_baselines.Player,raw_baselines.Median)),
436
- 'cpt_projection_map':dict(zip(raw_baselines.Player,raw_baselines.cpt_Median)),
437
- 'Salary_map':dict(zip(raw_baselines.Player,raw_baselines.Salary)),
438
- 'Pos_map':dict(zip(raw_baselines.Player,raw_baselines.Position)),
439
- 'Own_map':dict(zip(raw_baselines.Player,raw_baselines['Own'])),
440
- 'cpt_Own_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_Own'])),
441
- 'Team_map':dict(zip(raw_baselines.Player,raw_baselines.Team)),
442
- 'STDev_map':dict(zip(raw_baselines.Player,raw_baselines.STDev)),
443
- 'cpt_STDev_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_STDev']))
444
  }
445
  Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
446
  Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
@@ -473,15 +484,15 @@ with tab1:
473
  elif sim_site_var1 == 'Fanduel':
474
  st.session_state.working_seed = FD_seed.copy()
475
  maps_dict = {
476
- 'Projection_map':dict(zip(raw_baselines.Player,raw_baselines.Median)),
477
- 'cpt_projection_map':dict(zip(raw_baselines.Player,raw_baselines.cpt_Median)),
478
- 'Salary_map':dict(zip(raw_baselines.Player,raw_baselines.Salary)),
479
- 'Pos_map':dict(zip(raw_baselines.Player,raw_baselines.Position)),
480
- 'Own_map':dict(zip(raw_baselines.Player,raw_baselines['Own'])),
481
- 'cpt_Own_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_Own'])),
482
- 'Team_map':dict(zip(raw_baselines.Player,raw_baselines.Team)),
483
- 'STDev_map':dict(zip(raw_baselines.Player,raw_baselines.STDev)),
484
- 'cpt_STDev_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_STDev']))
485
  }
486
  Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
487
  Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
@@ -517,18 +528,18 @@ with tab1:
517
  freq_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:5].values, return_counts=True)),
518
  columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
519
  freq_working['Freq'] = freq_working['Freq'].astype(int)
520
- freq_working['Position'] = freq_working['Player'].map(maps_dict['Pos_map'])
521
  if sim_site_var1 == 'Draftkings':
522
  if sim_sport_var1 == 'NFL':
523
- freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map']) / 1.5
524
  elif sim_sport_var1 == 'NBA':
525
- freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map'])
526
  elif sim_site_var1 == 'Fanduel':
527
- freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map'])
528
- freq_working['Proj Own'] = freq_working['Player'].map(maps_dict['Own_map']) / 100
529
  freq_working['Exposure'] = freq_working['Freq']/(1000)
530
  freq_working['Edge'] = freq_working['Exposure'] - freq_working['Proj Own']
531
- freq_working['Team'] = freq_working['Player'].map(maps_dict['Team_map'])
532
  st.session_state.player_freq = freq_working.copy()
533
 
534
  if sim_site_var1 == 'Draftkings':
@@ -538,15 +549,15 @@ with tab1:
538
  cpt_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:1].values, return_counts=True)),
539
  columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
540
  cpt_working['Freq'] = cpt_working['Freq'].astype(int)
541
- cpt_working['Position'] = cpt_working['Player'].map(maps_dict['Pos_map'])
542
  if sim_sport_var1 == 'NFL':
543
- cpt_working['Salary'] = cpt_working['Player'].map(maps_dict['Salary_map'])
544
  elif sim_sport_var1 == 'NBA':
545
- cpt_working['Salary'] = cpt_working['Player'].map(maps_dict['Salary_map']) * 1.5
546
- cpt_working['Proj Own'] = cpt_working['Player'].map(maps_dict['cpt_Own_map']) / 100
547
  cpt_working['Exposure'] = cpt_working['Freq']/(1000)
548
  cpt_working['Edge'] = cpt_working['Exposure'] - cpt_working['Proj Own']
549
- cpt_working['Team'] = cpt_working['Player'].map(maps_dict['Team_map'])
550
  st.session_state.sp_freq = cpt_working.copy()
551
 
552
  if sim_site_var1 == 'Draftkings':
@@ -561,15 +572,15 @@ with tab1:
561
  flex_working['Position'] = flex_working['Player'].map(maps_dict['Pos_map'])
562
  if sim_site_var1 == 'Draftkings':
563
  if sim_sport_var1 == 'NFL':
564
- flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map']) / 1.5
565
  elif sim_sport_var1 == 'NBA':
566
- flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map'])
567
  elif sim_site_var1 == 'Fanduel':
568
- flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map'])
569
- flex_working['Proj Own'] = (flex_working['Player'].map(maps_dict['Own_map']) / 100) - (flex_working['Player'].map(maps_dict['cpt_Own_map']) / 100)
570
  flex_working['Exposure'] = flex_working['Freq']/(1000)
571
  flex_working['Edge'] = flex_working['Exposure'] - flex_working['Proj Own']
572
- flex_working['Team'] = flex_working['Player'].map(maps_dict['Team_map'])
573
  st.session_state.flex_freq = flex_working.copy()
574
 
575
  if sim_site_var1 == 'Draftkings':
 
192
 
193
  return dk_raw, fd_raw
194
 
195
+ @st.cache_data
196
+ def standardize_name(name):
197
+ # Remove common suffixes and standardize
198
+ suffixes = [' Jr.', ' Jr', ' Sr.', ' Sr', ' III', ' II', ' IV']
199
+ name = str(name) # Ensure name is a string
200
+ for suffix in suffixes:
201
+ name = name.replace(suffix, '')
202
+ return name.strip()
203
+
204
  @st.cache_data
205
  def convert_df(array):
206
  array = pd.DataFrame(array, columns=column_names)
 
225
  SimVar = 1
226
  Sim_Winners = []
227
  fp_array = seed_frame[:sharp_split, :]
228
+
229
+ vec_standardize = np.vectorize(standardize_name)
230
 
231
  # Pre-vectorize functions
232
+ vec_projection_map = lambda x: np.vectorize(maps_dict['Projection_map'].__getitem__)(vec_standardize(x))
233
+ vec_cpt_projection_map = lambda x: np.vectorize(maps_dict['cpt_projection_map'].__getitem__)(vec_standardize(x))
234
+ vec_stdev_map = lambda x: np.vectorize(maps_dict['STDev_map'].__getitem__)(vec_standardize(x))
235
+ vec_cpt_stdev_map = lambda x: np.vectorize(maps_dict['cpt_STDev_map'].__getitem__)(vec_standardize(x))
236
 
237
  st.write('Simulating contest on frames')
238
 
 
443
  if st.button("Run Contest Sim"):
444
  if 'working_seed' in st.session_state:
445
  maps_dict = {
446
+ 'Projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Median)),
447
+ 'cpt_projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.cpt_Median)),
448
+ 'Salary_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Salary)),
449
+ 'Pos_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Position)),
450
+ 'Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['Own'])),
451
+ 'cpt_Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_Own'])),
452
+ 'Team_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Team)),
453
+ 'STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.STDev)),
454
+ 'cpt_STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_STDev']))
455
  }
456
  Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
457
  Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
 
484
  elif sim_site_var1 == 'Fanduel':
485
  st.session_state.working_seed = FD_seed.copy()
486
  maps_dict = {
487
+ 'Projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Median)),
488
+ 'cpt_projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.cpt_Median)),
489
+ 'Salary_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Salary)),
490
+ 'Pos_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Position)),
491
+ 'Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['Own'])),
492
+ 'cpt_Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_Own'])),
493
+ 'Team_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Team)),
494
+ 'STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.STDev)),
495
+ 'cpt_STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_STDev']))
496
  }
497
  Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
498
  Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
 
528
  freq_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:5].values, return_counts=True)),
529
  columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
530
  freq_working['Freq'] = freq_working['Freq'].astype(int)
531
+ freq_working['Position'] = freq_working['Player'].apply(standardize_name).apply(standardize_name).map(maps_dict['Pos_map'])
532
  if sim_site_var1 == 'Draftkings':
533
  if sim_sport_var1 == 'NFL':
534
+ freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) / 1.5
535
  elif sim_sport_var1 == 'NBA':
536
+ freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
537
  elif sim_site_var1 == 'Fanduel':
538
+ freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
539
+ freq_working['Proj Own'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Own_map']) / 100
540
  freq_working['Exposure'] = freq_working['Freq']/(1000)
541
  freq_working['Edge'] = freq_working['Exposure'] - freq_working['Proj Own']
542
+ freq_working['Team'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
543
  st.session_state.player_freq = freq_working.copy()
544
 
545
  if sim_site_var1 == 'Draftkings':
 
549
  cpt_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:1].values, return_counts=True)),
550
  columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
551
  cpt_working['Freq'] = cpt_working['Freq'].astype(int)
552
+ cpt_working['Position'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Pos_map'])
553
  if sim_sport_var1 == 'NFL':
554
+ cpt_working['Salary'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
555
  elif sim_sport_var1 == 'NBA':
556
+ cpt_working['Salary'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) * 1.5
557
+ cpt_working['Proj Own'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['cpt_Own_map']) / 100
558
  cpt_working['Exposure'] = cpt_working['Freq']/(1000)
559
  cpt_working['Edge'] = cpt_working['Exposure'] - cpt_working['Proj Own']
560
+ cpt_working['Team'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
561
  st.session_state.sp_freq = cpt_working.copy()
562
 
563
  if sim_site_var1 == 'Draftkings':
 
572
  flex_working['Position'] = flex_working['Player'].map(maps_dict['Pos_map'])
573
  if sim_site_var1 == 'Draftkings':
574
  if sim_sport_var1 == 'NFL':
575
+ flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) / 1.5
576
  elif sim_sport_var1 == 'NBA':
577
+ flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
578
  elif sim_site_var1 == 'Fanduel':
579
+ flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
580
+ flex_working['Proj Own'] = (flex_working['Player'].apply(standardize_name).map(maps_dict['Own_map']) / 100) - (flex_working['Player'].apply(standardize_name).map(maps_dict['cpt_Own_map']) / 100)
581
  flex_working['Exposure'] = flex_working['Freq']/(1000)
582
  flex_working['Edge'] = flex_working['Exposure'] - flex_working['Proj Own']
583
+ flex_working['Team'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
584
  st.session_state.flex_freq = flex_working.copy()
585
 
586
  if sim_site_var1 == 'Draftkings':