Spaces:
Running
Running
James McCool
commited on
Commit
·
297c202
1
Parent(s):
6bcbd26
Enhance seed frame initialization with player validation
Browse filesUpdated the seed frame initialization functions for DraftKings and FanDuel to include validation against a set of valid players. This ensures that only players present in the Range of Outcomes collection are considered, improving data integrity. Additionally, refactored name mapping to use apply for better performance and clarity.
app.py
CHANGED
@@ -59,6 +59,11 @@ def init_DK_seed_frames(load_size):
|
|
59 |
cursor = collection.find()
|
60 |
raw_data = pd.DataFrame(list(cursor))
|
61 |
names_dict = dict(zip(raw_data['key'], raw_data['value']))
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
collection = db["DK_NBA_seed_frame"]
|
64 |
cursor = collection.find().limit(load_size)
|
@@ -66,9 +71,13 @@ def init_DK_seed_frames(load_size):
|
|
66 |
raw_display = pd.DataFrame(list(cursor))
|
67 |
raw_display = raw_display[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
|
68 |
dict_columns = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX']
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
72 |
raw_display = raw_display.dropna()
|
73 |
DK_seed = raw_display.to_numpy()
|
74 |
|
@@ -81,6 +90,11 @@ def init_DK_secondary_seed_frames(load_size):
|
|
81 |
cursor = collection.find()
|
82 |
raw_data = pd.DataFrame(list(cursor))
|
83 |
names_dict = dict(zip(raw_data['key'], raw_data['value']))
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
collection = db["DK_NBA_Secondary_seed_frame"]
|
86 |
cursor = collection.find().limit(load_size)
|
@@ -88,9 +102,13 @@ def init_DK_secondary_seed_frames(load_size):
|
|
88 |
raw_display = pd.DataFrame(list(cursor))
|
89 |
raw_display = raw_display[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
|
90 |
dict_columns = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX']
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
94 |
raw_display = raw_display.dropna()
|
95 |
DK_seed = raw_display.to_numpy()
|
96 |
|
@@ -103,6 +121,11 @@ def init_FD_seed_frames(load_size):
|
|
103 |
cursor = collection.find()
|
104 |
raw_data = pd.DataFrame(list(cursor))
|
105 |
names_dict = dict(zip(raw_data['key'], raw_data['value']))
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
collection = db["FD_NBA_seed_frame"]
|
108 |
cursor = collection.find().limit(load_size)
|
@@ -110,9 +133,13 @@ def init_FD_seed_frames(load_size):
|
|
110 |
raw_display = pd.DataFrame(list(cursor))
|
111 |
raw_display = raw_display[['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
|
112 |
dict_columns = ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1']
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
116 |
raw_display = raw_display.dropna()
|
117 |
FD_seed = raw_display.to_numpy()
|
118 |
|
@@ -125,6 +152,11 @@ def init_FD_secondary_seed_frames(load_size):
|
|
125 |
cursor = collection.find()
|
126 |
raw_data = pd.DataFrame(list(cursor))
|
127 |
names_dict = dict(zip(raw_data['key'], raw_data['value']))
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
collection = db["FD_NBA_Secondary_seed_frame"]
|
130 |
cursor = collection.find().limit(load_size)
|
@@ -132,9 +164,13 @@ def init_FD_secondary_seed_frames(load_size):
|
|
132 |
raw_display = pd.DataFrame(list(cursor))
|
133 |
raw_display = raw_display[['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
|
134 |
dict_columns = ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1']
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
138 |
raw_display = raw_display.dropna()
|
139 |
FD_seed = raw_display.to_numpy()
|
140 |
|
@@ -328,15 +364,11 @@ with tab1:
|
|
328 |
if sim_slate_var1 == 'Main Slate':
|
329 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split)
|
330 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
331 |
-
|
332 |
-
st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
333 |
raw_baselines = dk_raw
|
334 |
column_names = dk_columns
|
335 |
elif sim_slate_var1 == 'Secondary Slate':
|
336 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
|
337 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
338 |
-
|
339 |
-
st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
340 |
raw_baselines = dk_secondary
|
341 |
column_names = dk_columns
|
342 |
|
@@ -344,15 +376,11 @@ with tab1:
|
|
344 |
if sim_slate_var1 == 'Main Slate':
|
345 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split)
|
346 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
347 |
-
|
348 |
-
st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
349 |
raw_baselines = fd_raw
|
350 |
column_names = fd_columns
|
351 |
elif sim_slate_var1 == 'Secondary Slate':
|
352 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
|
353 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
354 |
-
|
355 |
-
st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
356 |
raw_baselines = fd_secondary
|
357 |
column_names = fd_columns
|
358 |
|
@@ -694,14 +722,12 @@ with tab2:
|
|
694 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
|
695 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
696 |
|
697 |
-
st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
698 |
raw_baselines = dk_raw
|
699 |
column_names = dk_columns
|
700 |
elif slate_var1 == 'Secondary Slate':
|
701 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
|
702 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
703 |
|
704 |
-
st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
705 |
raw_baselines = dk_secondary
|
706 |
column_names = dk_columns
|
707 |
|
@@ -725,14 +751,12 @@ with tab2:
|
|
725 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
|
726 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
727 |
|
728 |
-
st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
729 |
raw_baselines = fd_raw
|
730 |
column_names = fd_columns
|
731 |
elif slate_var1 == 'Secondary Slate':
|
732 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
|
733 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
734 |
-
|
735 |
-
st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
736 |
raw_baselines = fd_secondary
|
737 |
column_names = fd_columns
|
738 |
|
|
|
59 |
cursor = collection.find()
|
60 |
raw_data = pd.DataFrame(list(cursor))
|
61 |
names_dict = dict(zip(raw_data['key'], raw_data['value']))
|
62 |
+
|
63 |
+
# Get the valid players from the Range of Outcomes collection
|
64 |
+
collection = db["Player_Range_Of_Outcomes"]
|
65 |
+
cursor = collection.find({"site": "Draftkings", "slate": "Main Slate"})
|
66 |
+
valid_players = set(pd.DataFrame(list(cursor))['Name'].unique())
|
67 |
|
68 |
collection = db["DK_NBA_seed_frame"]
|
69 |
cursor = collection.find().limit(load_size)
|
|
|
71 |
raw_display = pd.DataFrame(list(cursor))
|
72 |
raw_display = raw_display[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
|
73 |
dict_columns = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX']
|
74 |
+
# Map names
|
75 |
+
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict))
|
76 |
+
|
77 |
+
# Validate lineups against valid players
|
78 |
+
raw_display = validate_lineup_players(raw_display, valid_players, dict_columns)
|
79 |
+
|
80 |
+
# Remove any remaining NaN values
|
81 |
raw_display = raw_display.dropna()
|
82 |
DK_seed = raw_display.to_numpy()
|
83 |
|
|
|
90 |
cursor = collection.find()
|
91 |
raw_data = pd.DataFrame(list(cursor))
|
92 |
names_dict = dict(zip(raw_data['key'], raw_data['value']))
|
93 |
+
|
94 |
+
# Get the valid players from the Range of Outcomes collection
|
95 |
+
collection = db["Player_Range_Of_Outcomes"]
|
96 |
+
cursor = collection.find({"site": "Draftkings", "slate": "Secondary Slate"})
|
97 |
+
valid_players = set(pd.DataFrame(list(cursor))['Name'].unique())
|
98 |
|
99 |
collection = db["DK_NBA_Secondary_seed_frame"]
|
100 |
cursor = collection.find().limit(load_size)
|
|
|
102 |
raw_display = pd.DataFrame(list(cursor))
|
103 |
raw_display = raw_display[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
|
104 |
dict_columns = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX']
|
105 |
+
# Map names
|
106 |
+
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict))
|
107 |
+
|
108 |
+
# Validate lineups against valid players
|
109 |
+
raw_display = validate_lineup_players(raw_display, valid_players, dict_columns)
|
110 |
+
|
111 |
+
# Remove any remaining NaN values
|
112 |
raw_display = raw_display.dropna()
|
113 |
DK_seed = raw_display.to_numpy()
|
114 |
|
|
|
121 |
cursor = collection.find()
|
122 |
raw_data = pd.DataFrame(list(cursor))
|
123 |
names_dict = dict(zip(raw_data['key'], raw_data['value']))
|
124 |
+
|
125 |
+
# Get the valid players from the Range of Outcomes collection
|
126 |
+
collection = db["Player_Range_Of_Outcomes"]
|
127 |
+
cursor = collection.find({"site": "Fanduel", "slate": "Main Slate"})
|
128 |
+
valid_players = set(pd.DataFrame(list(cursor))['Name'].unique())
|
129 |
|
130 |
collection = db["FD_NBA_seed_frame"]
|
131 |
cursor = collection.find().limit(load_size)
|
|
|
133 |
raw_display = pd.DataFrame(list(cursor))
|
134 |
raw_display = raw_display[['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
|
135 |
dict_columns = ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1']
|
136 |
+
# Map names
|
137 |
+
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict))
|
138 |
+
|
139 |
+
# Validate lineups against valid players
|
140 |
+
raw_display = validate_lineup_players(raw_display, valid_players, dict_columns)
|
141 |
+
|
142 |
+
# Remove any remaining NaN values
|
143 |
raw_display = raw_display.dropna()
|
144 |
FD_seed = raw_display.to_numpy()
|
145 |
|
|
|
152 |
cursor = collection.find()
|
153 |
raw_data = pd.DataFrame(list(cursor))
|
154 |
names_dict = dict(zip(raw_data['key'], raw_data['value']))
|
155 |
+
|
156 |
+
# Get the valid players from the Range of Outcomes collection
|
157 |
+
collection = db["Player_Range_Of_Outcomes"]
|
158 |
+
cursor = collection.find({"site": "Fanduel", "slate": "Secondary Slate"})
|
159 |
+
valid_players = set(pd.DataFrame(list(cursor))['Name'].unique())
|
160 |
|
161 |
collection = db["FD_NBA_Secondary_seed_frame"]
|
162 |
cursor = collection.find().limit(load_size)
|
|
|
164 |
raw_display = pd.DataFrame(list(cursor))
|
165 |
raw_display = raw_display[['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
|
166 |
dict_columns = ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1']
|
167 |
+
# Map names
|
168 |
+
raw_display[dict_columns] = raw_display[dict_columns].apply(lambda x: x.map(names_dict))
|
169 |
+
|
170 |
+
# Validate lineups against valid players
|
171 |
+
raw_display = validate_lineup_players(raw_display, valid_players, dict_columns)
|
172 |
+
|
173 |
+
# Remove any remaining NaN values
|
174 |
raw_display = raw_display.dropna()
|
175 |
FD_seed = raw_display.to_numpy()
|
176 |
|
|
|
364 |
if sim_slate_var1 == 'Main Slate':
|
365 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split)
|
366 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
|
|
|
|
367 |
raw_baselines = dk_raw
|
368 |
column_names = dk_columns
|
369 |
elif sim_slate_var1 == 'Secondary Slate':
|
370 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
|
371 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
|
|
|
|
372 |
raw_baselines = dk_secondary
|
373 |
column_names = dk_columns
|
374 |
|
|
|
376 |
if sim_slate_var1 == 'Main Slate':
|
377 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split)
|
378 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
|
|
|
|
379 |
raw_baselines = fd_raw
|
380 |
column_names = fd_columns
|
381 |
elif sim_slate_var1 == 'Secondary Slate':
|
382 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
|
383 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
|
|
|
|
384 |
raw_baselines = fd_secondary
|
385 |
column_names = fd_columns
|
386 |
|
|
|
722 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
|
723 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
724 |
|
|
|
725 |
raw_baselines = dk_raw
|
726 |
column_names = dk_columns
|
727 |
elif slate_var1 == 'Secondary Slate':
|
728 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
|
729 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
730 |
|
|
|
731 |
raw_baselines = dk_secondary
|
732 |
column_names = dk_columns
|
733 |
|
|
|
751 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
|
752 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
753 |
|
|
|
754 |
raw_baselines = fd_raw
|
755 |
column_names = fd_columns
|
756 |
elif slate_var1 == 'Secondary Slate':
|
757 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
|
758 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
759 |
+
|
|
|
760 |
raw_baselines = fd_secondary
|
761 |
column_names = fd_columns
|
762 |
|