James McCool commited on
Commit
b01e348
·
1 Parent(s): 1bb122a

Refactor app.py to replace predict_dupes calls with reassess_edge for both working and export frames, enhancing the accuracy of edge assessments by utilizing the updated reassess_edge function in reassess_edge.py.

Browse files
Files changed (2) hide show
  1. app.py +4 -4
  2. global_func/reassess_edge.py +46 -123
app.py CHANGED
@@ -1508,8 +1508,8 @@ with tab2:
1508
  st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
1509
  st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
1510
 
1511
- st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
1512
- # st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], prior_frame, st.session_state['map_dict'], Contest_Size, salary_max)
1513
  st.session_state['export_merge'] = st.session_state['working_frame'].copy()
1514
  elif exp_submitted:
1515
  st.session_state['settings_base'] = False
@@ -1606,8 +1606,8 @@ with tab2:
1606
  st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
1607
  st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
1608
 
1609
- st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
1610
- # st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], prior_frame, st.session_state['map_dict'], Contest_Size, salary_max)
1611
  st.session_state['export_merge'] = st.session_state['export_base'].copy()
1612
 
1613
  with st.container():
 
1508
  st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
1509
  st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
1510
 
1511
+ # st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
1512
+ st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
1513
  st.session_state['export_merge'] = st.session_state['working_frame'].copy()
1514
  elif exp_submitted:
1515
  st.session_state['settings_base'] = False
 
1606
  st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
1607
  st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
1608
 
1609
+ # st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
1610
+ st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
1611
  st.session_state['export_merge'] = st.session_state['export_base'].copy()
1612
 
1613
  with st.container():
global_func/reassess_edge.py CHANGED
@@ -1,132 +1,55 @@
1
- # Rerunning the predict_dupes.py function on a small portfolio after running exposure_spread.py breaks the metrics
2
- # This is because the preeict_dupes.py function is exclusive of the lineups it takes it, and is meant to give edge around the median expectation of those lineups
3
- # So what we need to do instead is find the differences across the original set and the post-exposure_spread.py set and calculate new metrics around the diff
4
- # Need to find the diff in salary, median, and Own to calculate new Dupes, finish_percentile, Win%, Edge, Weighted Own, and Geomean
5
- # Then at the end run the Diveristy function to set a new column for Diversity
6
- # This way we only make the changes to the metrics where we have a difference in salary, median, and Own and leave the rest of the lineups alone
7
-
8
  import pandas as pd
9
- import numpy as np
10
- import math
11
- import streamlit as st
12
 
13
- def calculate_weighted_ownership_single_row(row_ownerships):
14
  """
15
- Calculate weighted ownership for a single row of ownership values.
 
16
 
17
  Args:
18
- row_ownerships: Series containing ownership values in percentage form
 
 
 
 
 
 
 
19
 
20
  Returns:
21
- float: Calculated weighted ownership value
22
  """
23
-
24
- ownership_values = pd.to_numeric(row_ownerships.values, errors='coerce')
25
- ownership_values = np.where(np.isnan(ownership_values), 0, ownership_values) / 100
26
-
27
- # Calculate mean
28
- row_mean = np.mean(ownership_values)
29
-
30
- # Calculate average of each value with the overall mean
31
- value_means = (ownership_values + row_mean) / 2
32
-
33
- # Take average of all those means
34
- avg_of_means = np.mean(value_means)
35
-
36
- # Multiply by count of values
37
- weighted = avg_of_means * len(ownership_values)
38
-
39
- # Subtract (max - min)
40
- row_max = np.max(ownership_values)
41
- row_min = np.min(ownership_values)
42
- weighted = weighted - (row_max - row_min)
43
-
44
- # Convert back to percentage form
45
- return weighted * 10000
46
-
47
- def reassess_finish_percentile(row: pd.Series) -> float:
48
- own_diff = float(row['own_diff'])
49
- median_diff = float(row['median_diff'])
50
- finish_percentile = row['Finish_percentile'] + (own_diff / 200) - (median_diff / 100)
51
-
52
- return finish_percentile
53
-
54
- def reassess_dupes(row: pd.Series, salary_max: int) -> float:
55
- # Convert to signed integers to avoid uint16 wrap-around
56
- salary = int(row['salary'])
57
- salary_diff = int(row['salary_diff'])
58
- own_diff = float(row['own_diff'])
59
- dupes = int(row['Dupes'])
60
-
61
- # Calculate current distance from threshold
62
- threshold = salary_max - 500
63
- distance_from_threshold = salary - threshold
64
-
65
- # Check if salary crossed the threshold due to salary_diff
66
- original_salary = salary - salary_diff # What the salary was before the change
67
-
68
- if original_salary >= threshold and salary < threshold:
69
- # Salary crossed from above to below threshold - apply dramatic reduction
70
- reduction_factor = 2 ** (abs(distance_from_threshold) / 100)
71
- adjusted_dupes = dupes / reduction_factor
72
- elif distance_from_threshold < 0:
73
- # Already below threshold - apply reduction
74
- reduction_factor = 2 ** (abs(distance_from_threshold) / 100)
75
- adjusted_dupes = dupes / reduction_factor
76
  else:
77
- # Above threshold: use original formula
78
- if salary_diff > 0:
79
- adjusted_dupes = dupes + (
80
- (distance_from_threshold / 100) * (2 + (salary_diff / 200)) *
81
- (1 if salary_diff >= 0 else -1)
82
- )
83
- else:
84
- adjusted_dupes = dupes
85
-
86
- # Add ownership component
87
- final_dupes = max(0, adjusted_dupes + (own_diff / 10))
88
-
89
- return math.ceil(final_dupes)
90
-
91
- def reassess_lineup_edge(row: pd.Series, Contest_Size: int, prev_finish_percentile: float, prev_dupes: int) -> float:
92
- fp_rate = row['Finish_percentile'] / prev_finish_percentile
93
- if prev_dupes > 0:
94
- dupe_rate = row['Dupes'] / prev_dupes
95
- else:
96
- dupe_rate = min(row['Dupes'], 3)
97
- lineup_edge = (fp_rate * dupe_rate)
98
-
99
- return (row['Lineup Edge'] + (row['Lineup Edge'] * lineup_edge)) / 2
100
-
101
- def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame, maps_dict: dict, Contest_Size: int, salary_max: int) -> pd.DataFrame:
102
- orig_df = original_frame.copy()
103
- orig_df = orig_df.reset_index(drop=True)
104
- refactored_df = refactored_frame.copy()
105
- refactored_df = refactored_df.reset_index(drop=True)
106
-
107
- refactored_df['salary_diff'] = (refactored_df['salary'] - orig_df['salary']).astype('int16')
108
- refactored_df['median_diff'] = (refactored_df['median'] - orig_df['median']).astype('float32')
109
- refactored_df['own_diff'] = (refactored_df['Own'] - orig_df['Own']).astype('float32')
110
-
111
- change_mask = refactored_df[refactored_df['median_diff'] != 0]
112
-
113
- salary_col_index = refactored_df.columns.get_loc('salary')
114
- num_players = salary_col_index
115
- own_columns = [f'player_{i}_own' for i in range(1, num_players)]
116
-
117
- for col in range(num_players):
118
- refactored_df[f'player_{col}_own'] = refactored_df.iloc[:,col].map(maps_dict['own_map']).astype('float32') / 100
119
-
120
- for lineups in change_mask.index:
121
- prev_finish_percentile = refactored_df.loc[lineups, 'Finish_percentile']
122
- prev_dupes = refactored_df.loc[lineups, 'Dupes']
123
- refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max)
124
- refactored_df.loc[lineups, 'Finish_percentile'] = max(reassess_finish_percentile(refactored_df.loc[lineups, :]), .005 + refactored_df.loc[lineups, 'Win%'])
125
- refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
126
- refactored_df.loc[lineups, 'Lineup Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size, prev_finish_percentile, prev_dupes)
127
- refactored_df.loc[lineups, 'Weighted Own'] = calculate_weighted_ownership_single_row(refactored_df.loc[lineups, own_columns])
128
- refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(), 1 / len(own_columns))
129
-
130
- refactored_df = refactored_df.drop(columns=['salary_diff', 'median_diff', 'own_diff', 'player_0_own'] + own_columns)
131
-
132
- return refactored_df
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
+ from global_func.predict_dupes import predict_dupes
 
 
3
 
4
+ def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str) -> pd.DataFrame:
5
  """
6
+ Reassess edge by re-inserting modified rows into the base frame, running predict_dupes,
7
+ and then extracting the updated modified rows.
8
 
9
  Args:
10
+ modified_frame: DataFrame with rows that were modified by exposure_spread
11
+ base_frame: Original base frame (base_frame for Portfolio, original export_base for Export)
12
+ maps_dict: Dictionary containing player mappings
13
+ site_var: Site variable (Draftkings/Fanduel)
14
+ type_var: Type variable (Classic/Showdown)
15
+ Contest_Size: Contest size for calculations
16
+ strength_var: Strength variable (Weak/Average/Sharp)
17
+ sport_var: Sport variable
18
 
19
  Returns:
20
+ DataFrame: Updated modified_frame with recalculated metrics
21
  """
22
+ # Create a copy of the base frame
23
+ combined_frame = base_frame.copy()
24
+
25
+ # Get the player columns (excluding salary, median, Own)
26
+ player_columns = [col for col in modified_frame.columns if col not in ['salary', 'median', 'Own']]
27
+
28
+ # For each modified row, find and replace the corresponding row in the combined frame
29
+ for idx, modified_row in modified_frame.iterrows():
30
+ # Find the row in combined_frame that matches the player composition
31
+ for combined_idx, combined_row in combined_frame.iterrows():
32
+ if all(modified_row[col] == combined_row[col] for col in player_columns):
33
+ # Replace the row with the modified version
34
+ combined_frame.loc[combined_idx] = modified_row
35
+ break
36
+
37
+ # Run predict_dupes on the combined frame
38
+ updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var)
39
+
40
+ # Extract the updated modified rows
41
+ updated_modified_rows = []
42
+ for idx, modified_row in modified_frame.iterrows():
43
+ for combined_idx, combined_row in updated_combined_frame.iterrows():
44
+ if all(modified_row[col] == combined_row[col] for col in player_columns):
45
+ updated_modified_rows.append(combined_row)
46
+ break
47
+
48
+ # Convert back to DataFrame
49
+ if updated_modified_rows:
50
+ result_frame = pd.DataFrame(updated_modified_rows)
51
+ result_frame = result_frame.reset_index(drop=True)
52
+ return result_frame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  else:
54
+ # If no matches found, return the original modified_frame
55
+ return modified_frame