Spaces:
Running
Running
James McCool
commited on
Commit
·
6bcbd26
1
Parent(s):
9c5865c
Add player validation function for lineups
Browse filesIntroduced a new function, validate_lineup_players, to ensure that all players in specified columns exist within a set of valid players. This function is applied to both DraftKings and FanDuel lineups, enhancing data integrity by filtering out invalid entries before further processing.
app.py
CHANGED
@@ -173,6 +173,22 @@ def init_baselines():
|
|
173 |
|
174 |
return dk_raw, fd_raw, dk_secondary, fd_secondary
|
175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
@st.cache_data
|
177 |
def convert_df(array):
|
178 |
array = pd.DataFrame(array, columns=column_names)
|
@@ -245,6 +261,11 @@ with tab1:
|
|
245 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
246 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
247 |
|
|
|
|
|
|
|
|
|
|
|
248 |
sim_slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate'), key='sim_slate_var1')
|
249 |
sim_site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='sim_site_var1')
|
250 |
|
@@ -307,11 +328,15 @@ with tab1:
|
|
307 |
if sim_slate_var1 == 'Main Slate':
|
308 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split)
|
309 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
|
|
|
|
310 |
raw_baselines = dk_raw
|
311 |
column_names = dk_columns
|
312 |
elif sim_slate_var1 == 'Secondary Slate':
|
313 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
|
314 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
|
|
|
|
315 |
raw_baselines = dk_secondary
|
316 |
column_names = dk_columns
|
317 |
|
@@ -319,11 +344,15 @@ with tab1:
|
|
319 |
if sim_slate_var1 == 'Main Slate':
|
320 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split)
|
321 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
|
|
|
|
322 |
raw_baselines = fd_raw
|
323 |
column_names = fd_columns
|
324 |
elif sim_slate_var1 == 'Secondary Slate':
|
325 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
|
326 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
|
|
|
|
327 |
raw_baselines = fd_secondary
|
328 |
column_names = fd_columns
|
329 |
|
@@ -665,12 +694,14 @@ with tab2:
|
|
665 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
|
666 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
667 |
|
|
|
668 |
raw_baselines = dk_raw
|
669 |
column_names = dk_columns
|
670 |
elif slate_var1 == 'Secondary Slate':
|
671 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
|
672 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
673 |
|
|
|
674 |
raw_baselines = dk_secondary
|
675 |
column_names = dk_columns
|
676 |
|
@@ -694,12 +725,14 @@ with tab2:
|
|
694 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
|
695 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
696 |
|
|
|
697 |
raw_baselines = fd_raw
|
698 |
column_names = fd_columns
|
699 |
elif slate_var1 == 'Secondary Slate':
|
700 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
|
701 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
702 |
-
|
|
|
703 |
raw_baselines = fd_secondary
|
704 |
column_names = fd_columns
|
705 |
|
|
|
173 |
|
174 |
return dk_raw, fd_raw, dk_secondary, fd_secondary
|
175 |
|
176 |
+
@st.cache_data
|
177 |
+
def validate_lineup_players(df, valid_players, player_columns):
|
178 |
+
"""
|
179 |
+
Validates that all players in specified columns exist in valid_players set
|
180 |
+
|
181 |
+
Args:
|
182 |
+
df: DataFrame containing lineups
|
183 |
+
valid_players: Set of valid player names
|
184 |
+
player_columns: List of columns containing player names
|
185 |
+
|
186 |
+
Returns:
|
187 |
+
DataFrame with only valid lineups
|
188 |
+
"""
|
189 |
+
valid_rows = df[player_columns].apply(lambda x: x.isin(valid_players)).all(axis=1)
|
190 |
+
return df[valid_rows]
|
191 |
+
|
192 |
@st.cache_data
|
193 |
def convert_df(array):
|
194 |
array = pd.DataFrame(array, columns=column_names)
|
|
|
261 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
262 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
263 |
|
264 |
+
DK_seed = validate_lineup_players(DK_seed, set(dk_raw.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
265 |
+
FD_seed = validate_lineup_players(FD_seed, set(fd_raw.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
266 |
+
DK_secondary = validate_lineup_players(DK_secondary, set(dk_secondary.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
267 |
+
FD_secondary = validate_lineup_players(FD_secondary, set(fd_secondary.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
268 |
+
|
269 |
sim_slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate'), key='sim_slate_var1')
|
270 |
sim_site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='sim_site_var1')
|
271 |
|
|
|
328 |
if sim_slate_var1 == 'Main Slate':
|
329 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split)
|
330 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
331 |
+
|
332 |
+
st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
333 |
raw_baselines = dk_raw
|
334 |
column_names = dk_columns
|
335 |
elif sim_slate_var1 == 'Secondary Slate':
|
336 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
|
337 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
338 |
+
|
339 |
+
st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
340 |
raw_baselines = dk_secondary
|
341 |
column_names = dk_columns
|
342 |
|
|
|
344 |
if sim_slate_var1 == 'Main Slate':
|
345 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split)
|
346 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
347 |
+
|
348 |
+
st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
349 |
raw_baselines = fd_raw
|
350 |
column_names = fd_columns
|
351 |
elif sim_slate_var1 == 'Secondary Slate':
|
352 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
|
353 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
354 |
+
|
355 |
+
st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
356 |
raw_baselines = fd_secondary
|
357 |
column_names = fd_columns
|
358 |
|
|
|
694 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
|
695 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
696 |
|
697 |
+
st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
698 |
raw_baselines = dk_raw
|
699 |
column_names = dk_columns
|
700 |
elif slate_var1 == 'Secondary Slate':
|
701 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
|
702 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
703 |
|
704 |
+
st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
705 |
raw_baselines = dk_secondary
|
706 |
column_names = dk_columns
|
707 |
|
|
|
725 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
|
726 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
727 |
|
728 |
+
st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
729 |
raw_baselines = fd_raw
|
730 |
column_names = fd_columns
|
731 |
elif slate_var1 == 'Secondary Slate':
|
732 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
|
733 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
734 |
+
|
735 |
+
st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
736 |
raw_baselines = fd_secondary
|
737 |
column_names = fd_columns
|
738 |
|