James McCool commited on
Commit
6bcbd26
·
1 Parent(s): 9c5865c

Add player validation function for lineups

Browse files

Introduced a new function, validate_lineup_players, to ensure that all players in specified columns exist within a set of valid players. This function is applied to both DraftKings and FanDuel lineups, enhancing data integrity by filtering out invalid entries before further processing.

Files changed (1) hide show
  1. app.py +34 -1
app.py CHANGED
@@ -173,6 +173,22 @@ def init_baselines():
173
 
174
  return dk_raw, fd_raw, dk_secondary, fd_secondary
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  @st.cache_data
177
  def convert_df(array):
178
  array = pd.DataFrame(array, columns=column_names)
@@ -245,6 +261,11 @@ with tab1:
245
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
246
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
247
 
 
 
 
 
 
248
  sim_slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate'), key='sim_slate_var1')
249
  sim_site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='sim_site_var1')
250
 
@@ -307,11 +328,15 @@ with tab1:
307
  if sim_slate_var1 == 'Main Slate':
308
  st.session_state.working_seed = init_DK_seed_frames(sharp_split)
309
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
 
 
310
  raw_baselines = dk_raw
311
  column_names = dk_columns
312
  elif sim_slate_var1 == 'Secondary Slate':
313
  st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
314
  dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
 
 
315
  raw_baselines = dk_secondary
316
  column_names = dk_columns
317
 
@@ -319,11 +344,15 @@ with tab1:
319
  if sim_slate_var1 == 'Main Slate':
320
  st.session_state.working_seed = init_FD_seed_frames(sharp_split)
321
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
 
 
322
  raw_baselines = fd_raw
323
  column_names = fd_columns
324
  elif sim_slate_var1 == 'Secondary Slate':
325
  st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
326
  fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
 
 
327
  raw_baselines = fd_secondary
328
  column_names = fd_columns
329
 
@@ -665,12 +694,14 @@ with tab2:
665
  st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
666
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
667
 
 
668
  raw_baselines = dk_raw
669
  column_names = dk_columns
670
  elif slate_var1 == 'Secondary Slate':
671
  st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
672
  dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
673
 
 
674
  raw_baselines = dk_secondary
675
  column_names = dk_columns
676
 
@@ -694,12 +725,14 @@ with tab2:
694
  st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
695
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
696
 
 
697
  raw_baselines = fd_raw
698
  column_names = fd_columns
699
  elif slate_var1 == 'Secondary Slate':
700
  st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
701
  fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
702
-
 
703
  raw_baselines = fd_secondary
704
  column_names = fd_columns
705
 
 
173
 
174
  return dk_raw, fd_raw, dk_secondary, fd_secondary
175
 
176
+ @st.cache_data
177
+ def validate_lineup_players(df, valid_players, player_columns):
178
+ """
179
+ Validates that all players in specified columns exist in valid_players set
180
+
181
+ Args:
182
+ df: DataFrame containing lineups
183
+ valid_players: Set of valid player names
184
+ player_columns: List of columns containing player names
185
+
186
+ Returns:
187
+ DataFrame with only valid lineups
188
+ """
189
+ valid_rows = df[player_columns].apply(lambda x: x.isin(valid_players)).all(axis=1)
190
+ return df[valid_rows]
191
+
192
  @st.cache_data
193
  def convert_df(array):
194
  array = pd.DataFrame(array, columns=column_names)
 
261
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
262
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
263
 
264
+ DK_seed = validate_lineup_players(DK_seed, set(dk_raw.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
265
+ FD_seed = validate_lineup_players(FD_seed, set(fd_raw.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
266
+ DK_secondary = validate_lineup_players(DK_secondary, set(dk_secondary.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
267
+ FD_secondary = validate_lineup_players(FD_secondary, set(fd_secondary.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
268
+
269
  sim_slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate'), key='sim_slate_var1')
270
  sim_site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='sim_site_var1')
271
 
 
328
  if sim_slate_var1 == 'Main Slate':
329
  st.session_state.working_seed = init_DK_seed_frames(sharp_split)
330
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
331
+
332
+ st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
333
  raw_baselines = dk_raw
334
  column_names = dk_columns
335
  elif sim_slate_var1 == 'Secondary Slate':
336
  st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
337
  dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
338
+
339
+ st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
340
  raw_baselines = dk_secondary
341
  column_names = dk_columns
342
 
 
344
  if sim_slate_var1 == 'Main Slate':
345
  st.session_state.working_seed = init_FD_seed_frames(sharp_split)
346
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
347
+
348
+ st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
349
  raw_baselines = fd_raw
350
  column_names = fd_columns
351
  elif sim_slate_var1 == 'Secondary Slate':
352
  st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
353
  fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
354
+
355
+ st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
356
  raw_baselines = fd_secondary
357
  column_names = fd_columns
358
 
 
694
  st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
695
  dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
696
 
697
+ st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
698
  raw_baselines = dk_raw
699
  column_names = dk_columns
700
  elif slate_var1 == 'Secondary Slate':
701
  st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
702
  dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
703
 
704
+ st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
705
  raw_baselines = dk_secondary
706
  column_names = dk_columns
707
 
 
725
  st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
726
  fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
727
 
728
+ st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
729
  raw_baselines = fd_raw
730
  column_names = fd_columns
731
  elif slate_var1 == 'Secondary Slate':
732
  st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
733
  fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
734
+
735
+ st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
736
  raw_baselines = fd_secondary
737
  column_names = fd_columns
738