James McCool
commited on
Commit
·
b01e348
1
Parent(s):
1bb122a
Refactor app.py to replace predict_dupes calls with reassess_edge for both working and export frames, enhancing the accuracy of edge assessments by utilizing the updated reassess_edge function in reassess_edge.py.
Browse files- app.py +4 -4
- global_func/reassess_edge.py +46 -123
app.py
CHANGED
@@ -1508,8 +1508,8 @@ with tab2:
|
|
1508 |
st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
|
1509 |
st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
|
1510 |
|
1511 |
-
st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
1512 |
-
|
1513 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
1514 |
elif exp_submitted:
|
1515 |
st.session_state['settings_base'] = False
|
@@ -1606,8 +1606,8 @@ with tab2:
|
|
1606 |
st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
|
1607 |
st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
|
1608 |
|
1609 |
-
st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
1610 |
-
|
1611 |
st.session_state['export_merge'] = st.session_state['export_base'].copy()
|
1612 |
|
1613 |
with st.container():
|
|
|
1508 |
st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
|
1509 |
st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
|
1510 |
|
1511 |
+
# st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
1512 |
+
st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
1513 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
1514 |
elif exp_submitted:
|
1515 |
st.session_state['settings_base'] = False
|
|
|
1606 |
st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
|
1607 |
st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
|
1608 |
|
1609 |
+
# st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
1610 |
+
st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
1611 |
st.session_state['export_merge'] = st.session_state['export_base'].copy()
|
1612 |
|
1613 |
with st.container():
|
global_func/reassess_edge.py
CHANGED
@@ -1,132 +1,55 @@
|
|
1 |
-
# Rerunning the predict_dupes.py function on a small portfolio after running exposure_spread.py breaks the metrics
|
2 |
-
# This is because the preeict_dupes.py function is exclusive of the lineups it takes it, and is meant to give edge around the median expectation of those lineups
|
3 |
-
# So what we need to do instead is find the differences across the original set and the post-exposure_spread.py set and calculate new metrics around the diff
|
4 |
-
# Need to find the diff in salary, median, and Own to calculate new Dupes, finish_percentile, Win%, Edge, Weighted Own, and Geomean
|
5 |
-
# Then at the end run the Diveristy function to set a new column for Diversity
|
6 |
-
# This way we only make the changes to the metrics where we have a difference in salary, median, and Own and leave the rest of the lineups alone
|
7 |
-
|
8 |
import pandas as pd
|
9 |
-
|
10 |
-
import math
|
11 |
-
import streamlit as st
|
12 |
|
13 |
-
def
|
14 |
"""
|
15 |
-
|
|
|
16 |
|
17 |
Args:
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
Returns:
|
21 |
-
|
22 |
"""
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
def reassess_dupes(row: pd.Series, salary_max: int) -> float:
|
55 |
-
# Convert to signed integers to avoid uint16 wrap-around
|
56 |
-
salary = int(row['salary'])
|
57 |
-
salary_diff = int(row['salary_diff'])
|
58 |
-
own_diff = float(row['own_diff'])
|
59 |
-
dupes = int(row['Dupes'])
|
60 |
-
|
61 |
-
# Calculate current distance from threshold
|
62 |
-
threshold = salary_max - 500
|
63 |
-
distance_from_threshold = salary - threshold
|
64 |
-
|
65 |
-
# Check if salary crossed the threshold due to salary_diff
|
66 |
-
original_salary = salary - salary_diff # What the salary was before the change
|
67 |
-
|
68 |
-
if original_salary >= threshold and salary < threshold:
|
69 |
-
# Salary crossed from above to below threshold - apply dramatic reduction
|
70 |
-
reduction_factor = 2 ** (abs(distance_from_threshold) / 100)
|
71 |
-
adjusted_dupes = dupes / reduction_factor
|
72 |
-
elif distance_from_threshold < 0:
|
73 |
-
# Already below threshold - apply reduction
|
74 |
-
reduction_factor = 2 ** (abs(distance_from_threshold) / 100)
|
75 |
-
adjusted_dupes = dupes / reduction_factor
|
76 |
else:
|
77 |
-
#
|
78 |
-
|
79 |
-
adjusted_dupes = dupes + (
|
80 |
-
(distance_from_threshold / 100) * (2 + (salary_diff / 200)) *
|
81 |
-
(1 if salary_diff >= 0 else -1)
|
82 |
-
)
|
83 |
-
else:
|
84 |
-
adjusted_dupes = dupes
|
85 |
-
|
86 |
-
# Add ownership component
|
87 |
-
final_dupes = max(0, adjusted_dupes + (own_diff / 10))
|
88 |
-
|
89 |
-
return math.ceil(final_dupes)
|
90 |
-
|
91 |
-
def reassess_lineup_edge(row: pd.Series, Contest_Size: int, prev_finish_percentile: float, prev_dupes: int) -> float:
|
92 |
-
fp_rate = row['Finish_percentile'] / prev_finish_percentile
|
93 |
-
if prev_dupes > 0:
|
94 |
-
dupe_rate = row['Dupes'] / prev_dupes
|
95 |
-
else:
|
96 |
-
dupe_rate = min(row['Dupes'], 3)
|
97 |
-
lineup_edge = (fp_rate * dupe_rate)
|
98 |
-
|
99 |
-
return (row['Lineup Edge'] + (row['Lineup Edge'] * lineup_edge)) / 2
|
100 |
-
|
101 |
-
def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame, maps_dict: dict, Contest_Size: int, salary_max: int) -> pd.DataFrame:
|
102 |
-
orig_df = original_frame.copy()
|
103 |
-
orig_df = orig_df.reset_index(drop=True)
|
104 |
-
refactored_df = refactored_frame.copy()
|
105 |
-
refactored_df = refactored_df.reset_index(drop=True)
|
106 |
-
|
107 |
-
refactored_df['salary_diff'] = (refactored_df['salary'] - orig_df['salary']).astype('int16')
|
108 |
-
refactored_df['median_diff'] = (refactored_df['median'] - orig_df['median']).astype('float32')
|
109 |
-
refactored_df['own_diff'] = (refactored_df['Own'] - orig_df['Own']).astype('float32')
|
110 |
-
|
111 |
-
change_mask = refactored_df[refactored_df['median_diff'] != 0]
|
112 |
-
|
113 |
-
salary_col_index = refactored_df.columns.get_loc('salary')
|
114 |
-
num_players = salary_col_index
|
115 |
-
own_columns = [f'player_{i}_own' for i in range(1, num_players)]
|
116 |
-
|
117 |
-
for col in range(num_players):
|
118 |
-
refactored_df[f'player_{col}_own'] = refactored_df.iloc[:,col].map(maps_dict['own_map']).astype('float32') / 100
|
119 |
-
|
120 |
-
for lineups in change_mask.index:
|
121 |
-
prev_finish_percentile = refactored_df.loc[lineups, 'Finish_percentile']
|
122 |
-
prev_dupes = refactored_df.loc[lineups, 'Dupes']
|
123 |
-
refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max)
|
124 |
-
refactored_df.loc[lineups, 'Finish_percentile'] = max(reassess_finish_percentile(refactored_df.loc[lineups, :]), .005 + refactored_df.loc[lineups, 'Win%'])
|
125 |
-
refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
|
126 |
-
refactored_df.loc[lineups, 'Lineup Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size, prev_finish_percentile, prev_dupes)
|
127 |
-
refactored_df.loc[lineups, 'Weighted Own'] = calculate_weighted_ownership_single_row(refactored_df.loc[lineups, own_columns])
|
128 |
-
refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(), 1 / len(own_columns))
|
129 |
-
|
130 |
-
refactored_df = refactored_df.drop(columns=['salary_diff', 'median_diff', 'own_diff', 'player_0_own'] + own_columns)
|
131 |
-
|
132 |
-
return refactored_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
+
from global_func.predict_dupes import predict_dupes
|
|
|
|
|
3 |
|
4 |
+
def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str) -> pd.DataFrame:
|
5 |
"""
|
6 |
+
Reassess edge by re-inserting modified rows into the base frame, running predict_dupes,
|
7 |
+
and then extracting the updated modified rows.
|
8 |
|
9 |
Args:
|
10 |
+
modified_frame: DataFrame with rows that were modified by exposure_spread
|
11 |
+
base_frame: Original base frame (base_frame for Portfolio, original export_base for Export)
|
12 |
+
maps_dict: Dictionary containing player mappings
|
13 |
+
site_var: Site variable (Draftkings/Fanduel)
|
14 |
+
type_var: Type variable (Classic/Showdown)
|
15 |
+
Contest_Size: Contest size for calculations
|
16 |
+
strength_var: Strength variable (Weak/Average/Sharp)
|
17 |
+
sport_var: Sport variable
|
18 |
|
19 |
Returns:
|
20 |
+
DataFrame: Updated modified_frame with recalculated metrics
|
21 |
"""
|
22 |
+
# Create a copy of the base frame
|
23 |
+
combined_frame = base_frame.copy()
|
24 |
+
|
25 |
+
# Get the player columns (excluding salary, median, Own)
|
26 |
+
player_columns = [col for col in modified_frame.columns if col not in ['salary', 'median', 'Own']]
|
27 |
+
|
28 |
+
# For each modified row, find and replace the corresponding row in the combined frame
|
29 |
+
for idx, modified_row in modified_frame.iterrows():
|
30 |
+
# Find the row in combined_frame that matches the player composition
|
31 |
+
for combined_idx, combined_row in combined_frame.iterrows():
|
32 |
+
if all(modified_row[col] == combined_row[col] for col in player_columns):
|
33 |
+
# Replace the row with the modified version
|
34 |
+
combined_frame.loc[combined_idx] = modified_row
|
35 |
+
break
|
36 |
+
|
37 |
+
# Run predict_dupes on the combined frame
|
38 |
+
updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var)
|
39 |
+
|
40 |
+
# Extract the updated modified rows
|
41 |
+
updated_modified_rows = []
|
42 |
+
for idx, modified_row in modified_frame.iterrows():
|
43 |
+
for combined_idx, combined_row in updated_combined_frame.iterrows():
|
44 |
+
if all(modified_row[col] == combined_row[col] for col in player_columns):
|
45 |
+
updated_modified_rows.append(combined_row)
|
46 |
+
break
|
47 |
+
|
48 |
+
# Convert back to DataFrame
|
49 |
+
if updated_modified_rows:
|
50 |
+
result_frame = pd.DataFrame(updated_modified_rows)
|
51 |
+
result_frame = result_frame.reset_index(drop=True)
|
52 |
+
return result_frame
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
else:
|
54 |
+
# If no matches found, return the original modified_frame
|
55 |
+
return modified_frame
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|