Spaces:
Sleeping
Sleeping
James McCool
commited on
Commit
·
7400821
1
Parent(s):
0f5f58e
Implement player name standardization in app.py to enhance data consistency across simulations. Added a new function to remove common suffixes from player names and updated relevant mappings to apply this standardization. This change improves the accuracy of player data handling for contest simulations on DraftKings and FanDuel.
Browse files
app.py
CHANGED
@@ -192,6 +192,15 @@ def init_baselines(sport):
|
|
192 |
|
193 |
return dk_raw, fd_raw
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
@st.cache_data
|
196 |
def convert_df(array):
|
197 |
array = pd.DataFrame(array, columns=column_names)
|
@@ -216,12 +225,14 @@ def sim_contest(Sim_size, seed_frame, maps_dict, sharp_split, Contest_Size):
|
|
216 |
SimVar = 1
|
217 |
Sim_Winners = []
|
218 |
fp_array = seed_frame[:sharp_split, :]
|
|
|
|
|
219 |
|
220 |
# Pre-vectorize functions
|
221 |
-
vec_projection_map = np.vectorize(maps_dict['Projection_map'].__getitem__)
|
222 |
-
vec_cpt_projection_map = np.vectorize(maps_dict['cpt_projection_map'].__getitem__)
|
223 |
-
vec_stdev_map = np.vectorize(maps_dict['STDev_map'].__getitem__)
|
224 |
-
vec_cpt_stdev_map = np.vectorize(maps_dict['cpt_STDev_map'].__getitem__)
|
225 |
|
226 |
st.write('Simulating contest on frames')
|
227 |
|
@@ -432,15 +443,15 @@ with tab1:
|
|
432 |
if st.button("Run Contest Sim"):
|
433 |
if 'working_seed' in st.session_state:
|
434 |
maps_dict = {
|
435 |
-
'Projection_map':dict(zip(raw_baselines.Player,raw_baselines.Median)),
|
436 |
-
'cpt_projection_map':dict(zip(raw_baselines.Player,raw_baselines.cpt_Median)),
|
437 |
-
'Salary_map':dict(zip(raw_baselines.Player,raw_baselines.Salary)),
|
438 |
-
'Pos_map':dict(zip(raw_baselines.Player,raw_baselines.Position)),
|
439 |
-
'Own_map':dict(zip(raw_baselines.Player,raw_baselines['Own'])),
|
440 |
-
'cpt_Own_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_Own'])),
|
441 |
-
'Team_map':dict(zip(raw_baselines.Player,raw_baselines.Team)),
|
442 |
-
'STDev_map':dict(zip(raw_baselines.Player,raw_baselines.STDev)),
|
443 |
-
'cpt_STDev_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_STDev']))
|
444 |
}
|
445 |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
|
446 |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
|
@@ -473,15 +484,15 @@ with tab1:
|
|
473 |
elif sim_site_var1 == 'Fanduel':
|
474 |
st.session_state.working_seed = FD_seed.copy()
|
475 |
maps_dict = {
|
476 |
-
'Projection_map':dict(zip(raw_baselines.Player,raw_baselines.Median)),
|
477 |
-
'cpt_projection_map':dict(zip(raw_baselines.Player,raw_baselines.cpt_Median)),
|
478 |
-
'Salary_map':dict(zip(raw_baselines.Player,raw_baselines.Salary)),
|
479 |
-
'Pos_map':dict(zip(raw_baselines.Player,raw_baselines.Position)),
|
480 |
-
'Own_map':dict(zip(raw_baselines.Player,raw_baselines['Own'])),
|
481 |
-
'cpt_Own_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_Own'])),
|
482 |
-
'Team_map':dict(zip(raw_baselines.Player,raw_baselines.Team)),
|
483 |
-
'STDev_map':dict(zip(raw_baselines.Player,raw_baselines.STDev)),
|
484 |
-
'cpt_STDev_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_STDev']))
|
485 |
}
|
486 |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
|
487 |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
|
@@ -517,18 +528,18 @@ with tab1:
|
|
517 |
freq_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:5].values, return_counts=True)),
|
518 |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
|
519 |
freq_working['Freq'] = freq_working['Freq'].astype(int)
|
520 |
-
freq_working['Position'] = freq_working['Player'].map(maps_dict['Pos_map'])
|
521 |
if sim_site_var1 == 'Draftkings':
|
522 |
if sim_sport_var1 == 'NFL':
|
523 |
-
freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map']) / 1.5
|
524 |
elif sim_sport_var1 == 'NBA':
|
525 |
-
freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map'])
|
526 |
elif sim_site_var1 == 'Fanduel':
|
527 |
-
freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map'])
|
528 |
-
freq_working['Proj Own'] = freq_working['Player'].map(maps_dict['Own_map']) / 100
|
529 |
freq_working['Exposure'] = freq_working['Freq']/(1000)
|
530 |
freq_working['Edge'] = freq_working['Exposure'] - freq_working['Proj Own']
|
531 |
-
freq_working['Team'] = freq_working['Player'].map(maps_dict['Team_map'])
|
532 |
st.session_state.player_freq = freq_working.copy()
|
533 |
|
534 |
if sim_site_var1 == 'Draftkings':
|
@@ -538,15 +549,15 @@ with tab1:
|
|
538 |
cpt_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:1].values, return_counts=True)),
|
539 |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
|
540 |
cpt_working['Freq'] = cpt_working['Freq'].astype(int)
|
541 |
-
cpt_working['Position'] = cpt_working['Player'].map(maps_dict['Pos_map'])
|
542 |
if sim_sport_var1 == 'NFL':
|
543 |
-
cpt_working['Salary'] = cpt_working['Player'].map(maps_dict['Salary_map'])
|
544 |
elif sim_sport_var1 == 'NBA':
|
545 |
-
cpt_working['Salary'] = cpt_working['Player'].map(maps_dict['Salary_map']) * 1.5
|
546 |
-
cpt_working['Proj Own'] = cpt_working['Player'].map(maps_dict['cpt_Own_map']) / 100
|
547 |
cpt_working['Exposure'] = cpt_working['Freq']/(1000)
|
548 |
cpt_working['Edge'] = cpt_working['Exposure'] - cpt_working['Proj Own']
|
549 |
-
cpt_working['Team'] = cpt_working['Player'].map(maps_dict['Team_map'])
|
550 |
st.session_state.sp_freq = cpt_working.copy()
|
551 |
|
552 |
if sim_site_var1 == 'Draftkings':
|
@@ -561,15 +572,15 @@ with tab1:
|
|
561 |
flex_working['Position'] = flex_working['Player'].map(maps_dict['Pos_map'])
|
562 |
if sim_site_var1 == 'Draftkings':
|
563 |
if sim_sport_var1 == 'NFL':
|
564 |
-
flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map']) / 1.5
|
565 |
elif sim_sport_var1 == 'NBA':
|
566 |
-
flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map'])
|
567 |
elif sim_site_var1 == 'Fanduel':
|
568 |
-
flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map'])
|
569 |
-
flex_working['Proj Own'] = (flex_working['Player'].map(maps_dict['Own_map']) / 100) - (flex_working['Player'].map(maps_dict['cpt_Own_map']) / 100)
|
570 |
flex_working['Exposure'] = flex_working['Freq']/(1000)
|
571 |
flex_working['Edge'] = flex_working['Exposure'] - flex_working['Proj Own']
|
572 |
-
flex_working['Team'] = flex_working['Player'].map(maps_dict['Team_map'])
|
573 |
st.session_state.flex_freq = flex_working.copy()
|
574 |
|
575 |
if sim_site_var1 == 'Draftkings':
|
|
|
192 |
|
193 |
return dk_raw, fd_raw
|
194 |
|
195 |
+
@st.cache_data
|
196 |
+
def standardize_name(name):
|
197 |
+
# Remove common suffixes and standardize
|
198 |
+
suffixes = [' Jr.', ' Jr', ' Sr.', ' Sr', ' III', ' II', ' IV']
|
199 |
+
name = str(name) # Ensure name is a string
|
200 |
+
for suffix in suffixes:
|
201 |
+
name = name.replace(suffix, '')
|
202 |
+
return name.strip()
|
203 |
+
|
204 |
@st.cache_data
|
205 |
def convert_df(array):
|
206 |
array = pd.DataFrame(array, columns=column_names)
|
|
|
225 |
SimVar = 1
|
226 |
Sim_Winners = []
|
227 |
fp_array = seed_frame[:sharp_split, :]
|
228 |
+
|
229 |
+
vec_standardize = np.vectorize(standardize_name)
|
230 |
|
231 |
# Pre-vectorize functions
|
232 |
+
vec_projection_map = lambda x: np.vectorize(maps_dict['Projection_map'].__getitem__)(vec_standardize(x))
|
233 |
+
vec_cpt_projection_map = lambda x: np.vectorize(maps_dict['cpt_projection_map'].__getitem__)(vec_standardize(x))
|
234 |
+
vec_stdev_map = lambda x: np.vectorize(maps_dict['STDev_map'].__getitem__)(vec_standardize(x))
|
235 |
+
vec_cpt_stdev_map = lambda x: np.vectorize(maps_dict['cpt_STDev_map'].__getitem__)(vec_standardize(x))
|
236 |
|
237 |
st.write('Simulating contest on frames')
|
238 |
|
|
|
443 |
if st.button("Run Contest Sim"):
|
444 |
if 'working_seed' in st.session_state:
|
445 |
maps_dict = {
|
446 |
+
'Projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Median)),
|
447 |
+
'cpt_projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.cpt_Median)),
|
448 |
+
'Salary_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Salary)),
|
449 |
+
'Pos_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Position)),
|
450 |
+
'Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['Own'])),
|
451 |
+
'cpt_Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_Own'])),
|
452 |
+
'Team_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Team)),
|
453 |
+
'STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.STDev)),
|
454 |
+
'cpt_STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_STDev']))
|
455 |
}
|
456 |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
|
457 |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
|
|
|
484 |
elif sim_site_var1 == 'Fanduel':
|
485 |
st.session_state.working_seed = FD_seed.copy()
|
486 |
maps_dict = {
|
487 |
+
'Projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Median)),
|
488 |
+
'cpt_projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.cpt_Median)),
|
489 |
+
'Salary_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Salary)),
|
490 |
+
'Pos_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Position)),
|
491 |
+
'Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['Own'])),
|
492 |
+
'cpt_Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_Own'])),
|
493 |
+
'Team_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Team)),
|
494 |
+
'STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.STDev)),
|
495 |
+
'cpt_STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_STDev']))
|
496 |
}
|
497 |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
|
498 |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
|
|
|
528 |
freq_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:5].values, return_counts=True)),
|
529 |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
|
530 |
freq_working['Freq'] = freq_working['Freq'].astype(int)
|
531 |
+
freq_working['Position'] = freq_working['Player'].apply(standardize_name).apply(standardize_name).map(maps_dict['Pos_map'])
|
532 |
if sim_site_var1 == 'Draftkings':
|
533 |
if sim_sport_var1 == 'NFL':
|
534 |
+
freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) / 1.5
|
535 |
elif sim_sport_var1 == 'NBA':
|
536 |
+
freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
537 |
elif sim_site_var1 == 'Fanduel':
|
538 |
+
freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
539 |
+
freq_working['Proj Own'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Own_map']) / 100
|
540 |
freq_working['Exposure'] = freq_working['Freq']/(1000)
|
541 |
freq_working['Edge'] = freq_working['Exposure'] - freq_working['Proj Own']
|
542 |
+
freq_working['Team'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
|
543 |
st.session_state.player_freq = freq_working.copy()
|
544 |
|
545 |
if sim_site_var1 == 'Draftkings':
|
|
|
549 |
cpt_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:1].values, return_counts=True)),
|
550 |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
|
551 |
cpt_working['Freq'] = cpt_working['Freq'].astype(int)
|
552 |
+
cpt_working['Position'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Pos_map'])
|
553 |
if sim_sport_var1 == 'NFL':
|
554 |
+
cpt_working['Salary'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
555 |
elif sim_sport_var1 == 'NBA':
|
556 |
+
cpt_working['Salary'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) * 1.5
|
557 |
+
cpt_working['Proj Own'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['cpt_Own_map']) / 100
|
558 |
cpt_working['Exposure'] = cpt_working['Freq']/(1000)
|
559 |
cpt_working['Edge'] = cpt_working['Exposure'] - cpt_working['Proj Own']
|
560 |
+
cpt_working['Team'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
|
561 |
st.session_state.sp_freq = cpt_working.copy()
|
562 |
|
563 |
if sim_site_var1 == 'Draftkings':
|
|
|
572 |
flex_working['Position'] = flex_working['Player'].map(maps_dict['Pos_map'])
|
573 |
if sim_site_var1 == 'Draftkings':
|
574 |
if sim_sport_var1 == 'NFL':
|
575 |
+
flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) / 1.5
|
576 |
elif sim_sport_var1 == 'NBA':
|
577 |
+
flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
578 |
elif sim_site_var1 == 'Fanduel':
|
579 |
+
flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
580 |
+
flex_working['Proj Own'] = (flex_working['Player'].apply(standardize_name).map(maps_dict['Own_map']) / 100) - (flex_working['Player'].apply(standardize_name).map(maps_dict['cpt_Own_map']) / 100)
|
581 |
flex_working['Exposure'] = flex_working['Freq']/(1000)
|
582 |
flex_working['Edge'] = flex_working['Exposure'] - flex_working['Proj Own']
|
583 |
+
flex_working['Team'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
|
584 |
st.session_state.flex_freq = flex_working.copy()
|
585 |
|
586 |
if sim_site_var1 == 'Draftkings':
|