James McCool commited on
Commit
297c202
·
1 Parent(s): 6bcbd26

Enhance seed frame initialization with player validation

Browse files

Updated the seed frame initialization functions for DraftKings and FanDuel to include validation against a set of valid players. This ensures that only players present in the Range of Outcomes collection are considered, improving data integrity. Additionally, refactored name mapping to use apply for better performance and clarity.

Files changed (1) hide show
  1. app.py +49 -25
app.py CHANGED
@@ -59,6 +59,11 @@ def init_DK_seed_frames(load_size):
59
  cursor = collection.find()
60
  raw_data = pd.DataFrame(list(cursor))
61
  names_dict = dict(zip(raw_data['key'], raw_data['value']))
 
 
 
 
 
62
 
63
  collection = db["DK_NBA_seed_frame"]
64
  cursor = collection.find().limit(load_size)
@@ -66,9 +71,13 @@ def init_DK_seed_frames(load_size):
66
  raw_display = pd.DataFrame(list(cursor))
67
  raw_display = raw_display[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
68
  dict_columns = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX']
69
- for col in dict_columns:
70
- raw_display[col] = raw_display[col].map(names_dict)
71
-
 
 
 
 
72
  raw_display = raw_display.dropna()
73
  DK_seed = raw_display.to_numpy()
74
 
@@ -81,6 +90,11 @@ def init_DK_secondary_seed_frames(load_size):
81
  cursor = collection.find()
82
  raw_data = pd.DataFrame(list(cursor))
83
  names_dict = dict(zip(raw_data['key'], raw_data['value']))
 
 
 
 
 
84
 
85
  collection = db["DK_NBA_Secondary_seed_frame"]
86
  cursor = collection.find().limit(load_size)
@@ -88,9 +102,13 @@ def init_DK_secondary_seed_frames(load_size):
88
  raw_display = pd.DataFrame(list(cursor))
89
  raw_display = raw_display[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
90
  dict_columns = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX']
91
- for col in dict_columns:
92
- raw_display[col] = raw_display[col].map(names_dict)
93
-
 
 
 
 
94
  raw_display = raw_display.dropna()
95
  DK_seed = raw_display.to_numpy()
96
 
@@ -103,6 +121,11 @@ def init_FD_seed_frames(load_size):
103
  cursor = collection.find()
104
  raw_data = pd.DataFrame(list(cursor))
105
  names_dict = dict(zip(raw_data['key'], raw_data['value']))
 
 
 
 
 
106
 
107
  collection = db["FD_NBA_seed_frame"]
108
  cursor = collection.find().limit(load_size)
@@ -110,9 +133,13 @@ def init_FD_seed_frames(load_size):
110
  raw_display = pd.DataFrame(list(cursor))
111
  raw_display = raw_display[['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
112
  dict_columns = ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1']
113
- for col in dict_columns:
114
- raw_display[col] = raw_display[col].map(names_dict)
115
-
 
 
 
 
116
  raw_display = raw_display.dropna()
117
  FD_seed = raw_display.to_numpy()
118
 
@@ -125,6 +152,11 @@ def init_FD_secondary_seed_frames(load_size):
125
  cursor = collection.find()
126
  raw_data = pd.DataFrame(list(cursor))
127
  names_dict = dict(zip(raw_data['key'], raw_data['value']))
 
 
 
 
 
128
 
129
  collection = db["FD_NBA_Secondary_seed_frame"]
130
  cursor = collection.find().limit(load_size)
@@ -132,9 +164,13 @@ def init_FD_secondary_seed_frames(load_size):
132
  raw_display = pd.DataFrame(list(cursor))
133
  raw_display = raw_display[['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
134
  dict_columns = ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1']
135
- for col in dict_columns:
136
- raw_display[col] = raw_display[col].map(names_dict)
137
-
 
 
 
 
138
  raw_display = raw_display.dropna()
139
  FD_seed = raw_display.to_numpy()
140
 
@@ -328,15 +364,11 @@ with tab1:
328
  if sim_slate_var1 == 'Main Slate':
329
  st.session_state.working_seed = init_DK_seed_frames(sharp_split)
330
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
331
-
332
- st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
333
  raw_baselines = dk_raw
334
  column_names = dk_columns
335
  elif sim_slate_var1 == 'Secondary Slate':
336
  st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
337
  dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
338
-
339
- st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
340
  raw_baselines = dk_secondary
341
  column_names = dk_columns
342
 
@@ -344,15 +376,11 @@ with tab1:
344
  if sim_slate_var1 == 'Main Slate':
345
  st.session_state.working_seed = init_FD_seed_frames(sharp_split)
346
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
347
-
348
- st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
349
  raw_baselines = fd_raw
350
  column_names = fd_columns
351
  elif sim_slate_var1 == 'Secondary Slate':
352
  st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
353
  fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
354
-
355
- st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
356
  raw_baselines = fd_secondary
357
  column_names = fd_columns
358
 
@@ -694,14 +722,12 @@ with tab2:
694
  st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
695
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
696
 
697
- st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
698
  raw_baselines = dk_raw
699
  column_names = dk_columns
700
  elif slate_var1 == 'Secondary Slate':
701
  st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
702
  dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
703
 
704
- st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
705
  raw_baselines = dk_secondary
706
  column_names = dk_columns
707
 
@@ -725,14 +751,12 @@ with tab2:
725
  st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
726
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
727
 
728
- st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
729
  raw_baselines = fd_raw
730
  column_names = fd_columns
731
  elif slate_var1 == 'Secondary Slate':
732
  st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
733
  fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
734
-
735
- st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
736
  raw_baselines = fd_secondary
737
  column_names = fd_columns
738
 
 
59
  cursor = collection.find()
60
  raw_data = pd.DataFrame(list(cursor))
61
  names_dict = dict(zip(raw_data['key'], raw_data['value']))
62
+
63
+ # Get the valid players from the Range of Outcomes collection
64
+ collection = db["Player_Range_Of_Outcomes"]
65
+ cursor = collection.find({"site": "Draftkings", "slate": "Main Slate"})
66
+ valid_players = set(pd.DataFrame(list(cursor))['Name'].unique())
67
 
68
  collection = db["DK_NBA_seed_frame"]
69
  cursor = collection.find().limit(load_size)
 
71
  raw_display = pd.DataFrame(list(cursor))
72
  raw_display = raw_display[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
73
  dict_columns = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX']
74
+ # Map names
75
+ raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict))
76
+
77
+ # Validate lineups against valid players
78
+ raw_display = validate_lineup_players(raw_display, valid_players, dict_columns)
79
+
80
+ # Remove any remaining NaN values
81
  raw_display = raw_display.dropna()
82
  DK_seed = raw_display.to_numpy()
83
 
 
90
  cursor = collection.find()
91
  raw_data = pd.DataFrame(list(cursor))
92
  names_dict = dict(zip(raw_data['key'], raw_data['value']))
93
+
94
+ # Get the valid players from the Range of Outcomes collection
95
+ collection = db["Player_Range_Of_Outcomes"]
96
+ cursor = collection.find({"site": "Draftkings", "slate": "Secondary Slate"})
97
+ valid_players = set(pd.DataFrame(list(cursor))['Name'].unique())
98
 
99
  collection = db["DK_NBA_Secondary_seed_frame"]
100
  cursor = collection.find().limit(load_size)
 
102
  raw_display = pd.DataFrame(list(cursor))
103
  raw_display = raw_display[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
104
  dict_columns = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX']
105
+ # Map names
106
+ raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict))
107
+
108
+ # Validate lineups against valid players
109
+ raw_display = validate_lineup_players(raw_display, valid_players, dict_columns)
110
+
111
+ # Remove any remaining NaN values
112
  raw_display = raw_display.dropna()
113
  DK_seed = raw_display.to_numpy()
114
 
 
121
  cursor = collection.find()
122
  raw_data = pd.DataFrame(list(cursor))
123
  names_dict = dict(zip(raw_data['key'], raw_data['value']))
124
+
125
+ # Get the valid players from the Range of Outcomes collection
126
+ collection = db["Player_Range_Of_Outcomes"]
127
+ cursor = collection.find({"site": "Fanduel", "slate": "Main Slate"})
128
+ valid_players = set(pd.DataFrame(list(cursor))['Name'].unique())
129
 
130
  collection = db["FD_NBA_seed_frame"]
131
  cursor = collection.find().limit(load_size)
 
133
  raw_display = pd.DataFrame(list(cursor))
134
  raw_display = raw_display[['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
135
  dict_columns = ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1']
136
+ # Map names
137
+ raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict))
138
+
139
+ # Validate lineups against valid players
140
+ raw_display = validate_lineup_players(raw_display, valid_players, dict_columns)
141
+
142
+ # Remove any remaining NaN values
143
  raw_display = raw_display.dropna()
144
  FD_seed = raw_display.to_numpy()
145
 
 
152
  cursor = collection.find()
153
  raw_data = pd.DataFrame(list(cursor))
154
  names_dict = dict(zip(raw_data['key'], raw_data['value']))
155
+
156
+ # Get the valid players from the Range of Outcomes collection
157
+ collection = db["Player_Range_Of_Outcomes"]
158
+ cursor = collection.find({"site": "Fanduel", "slate": "Secondary Slate"})
159
+ valid_players = set(pd.DataFrame(list(cursor))['Name'].unique())
160
 
161
  collection = db["FD_NBA_Secondary_seed_frame"]
162
  cursor = collection.find().limit(load_size)
 
164
  raw_display = pd.DataFrame(list(cursor))
165
  raw_display = raw_display[['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
166
  dict_columns = ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1']
167
+ # Map names
168
+ raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict))
169
+
170
+ # Validate lineups against valid players
171
+ raw_display = validate_lineup_players(raw_display, valid_players, dict_columns)
172
+
173
+ # Remove any remaining NaN values
174
  raw_display = raw_display.dropna()
175
  FD_seed = raw_display.to_numpy()
176
 
 
364
  if sim_slate_var1 == 'Main Slate':
365
  st.session_state.working_seed = init_DK_seed_frames(sharp_split)
366
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
 
 
367
  raw_baselines = dk_raw
368
  column_names = dk_columns
369
  elif sim_slate_var1 == 'Secondary Slate':
370
  st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
371
  dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
 
 
372
  raw_baselines = dk_secondary
373
  column_names = dk_columns
374
 
 
376
  if sim_slate_var1 == 'Main Slate':
377
  st.session_state.working_seed = init_FD_seed_frames(sharp_split)
378
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
 
 
379
  raw_baselines = fd_raw
380
  column_names = fd_columns
381
  elif sim_slate_var1 == 'Secondary Slate':
382
  st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
383
  fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
 
 
384
  raw_baselines = fd_secondary
385
  column_names = fd_columns
386
 
 
722
  st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
723
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
724
 
 
725
  raw_baselines = dk_raw
726
  column_names = dk_columns
727
  elif slate_var1 == 'Secondary Slate':
728
  st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
729
  dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
730
 
 
731
  raw_baselines = dk_secondary
732
  column_names = dk_columns
733
 
 
751
  st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
752
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
753
 
 
754
  raw_baselines = fd_raw
755
  column_names = fd_columns
756
  elif slate_var1 == 'Secondary Slate':
757
  st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
758
  fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
759
+
 
760
  raw_baselines = fd_secondary
761
  column_names = fd_columns
762