Spaces:

Multichem-PD
/

DFS_Portfolio_Manager

Running

App Files Files Community

James McCool commited on 8 days ago

Commit

b01e348

1 Parent(s): 1bb122a

Refactor app.py to replace predict_dupes calls with reassess_edge for both working and export frames, enhancing the accuracy of edge assessments by utilizing the updated reassess_edge function in reassess_edge.py.

Browse files

Files changed (2) hide show

app.py +4 -4
global_func/reassess_edge.py +46 -123

app.py CHANGED Viewed

@@ -1508,8 +1508,8 @@ with tab2:
                         st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
                         st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
-                        st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
-                        # st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], prior_frame, st.session_state['map_dict'], Contest_Size, salary_max)
                         st.session_state['export_merge'] = st.session_state['working_frame'].copy()
                     elif exp_submitted:
                         st.session_state['settings_base'] = False
@@ -1606,8 +1606,8 @@ with tab2:
                         st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
                         st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
-                        st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
-                        # st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], prior_frame, st.session_state['map_dict'], Contest_Size, salary_max)
                         st.session_state['export_merge'] = st.session_state['export_base'].copy()
         with st.container():

                         st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
                         st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
+                        # st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
+                        st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
                         st.session_state['export_merge'] = st.session_state['working_frame'].copy()
                     elif exp_submitted:
                         st.session_state['settings_base'] = False
                         st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
                         st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
+                        # st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
+                        st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
                         st.session_state['export_merge'] = st.session_state['export_base'].copy()
         with st.container():

global_func/reassess_edge.py CHANGED Viewed

@@ -1,132 +1,55 @@
-# Rerunning the predict_dupes.py function on a small portfolio after running exposure_spread.py breaks the metrics
-# This is because the preeict_dupes.py function is exclusive of the lineups it takes it, and is meant to give edge around the median expectation of those lineups
-# So what we need to do instead is find the differences across the original set and the post-exposure_spread.py set and calculate new metrics around the diff
-# Need to find the diff in salary, median, and Own to calculate new Dupes, finish_percentile, Win%, Edge, Weighted Own, and Geomean
-# Then at the end run the Diveristy function to set a new column for Diversity
-# This way we only make the changes to the metrics where we have a difference in salary, median, and Own and leave the rest of the lineups alone
 import pandas as pd
-import numpy as np
-import math
-import streamlit as st
-def calculate_weighted_ownership_single_row(row_ownerships):
     """
-    Calculate weighted ownership for a single row of ownership values.
     Args:
-        row_ownerships: Series containing ownership values in percentage form
     Returns:
-        float: Calculated weighted ownership value
     """
-    ownership_values = pd.to_numeric(row_ownerships.values, errors='coerce')
-    ownership_values = np.where(np.isnan(ownership_values), 0, ownership_values) / 100
-    # Calculate mean
-    row_mean = np.mean(ownership_values)
-    # Calculate average of each value with the overall mean
-    value_means = (ownership_values + row_mean) / 2
-    # Take average of all those means
-    avg_of_means = np.mean(value_means)
-    # Multiply by count of values
-    weighted = avg_of_means * len(ownership_values)
-    # Subtract (max - min)
-    row_max = np.max(ownership_values)
-    row_min = np.min(ownership_values)
-    weighted = weighted - (row_max - row_min)
-    # Convert back to percentage form
-    return weighted * 10000
-def reassess_finish_percentile(row: pd.Series) -> float:
-    own_diff = float(row['own_diff'])
-    median_diff = float(row['median_diff'])
-    finish_percentile = row['Finish_percentile'] + (own_diff / 200) - (median_diff / 100)
-    return finish_percentile
-def reassess_dupes(row: pd.Series, salary_max: int) -> float:
-    # Convert to signed integers to avoid uint16 wrap-around
-    salary = int(row['salary'])
-    salary_diff = int(row['salary_diff'])
-    own_diff = float(row['own_diff'])
-    dupes = int(row['Dupes'])
-    # Calculate current distance from threshold
-    threshold = salary_max - 500
-    distance_from_threshold = salary - threshold
-    # Check if salary crossed the threshold due to salary_diff
-    original_salary = salary - salary_diff  # What the salary was before the change
-    if original_salary >= threshold and salary < threshold:
-        # Salary crossed from above to below threshold - apply dramatic reduction
-        reduction_factor = 2 ** (abs(distance_from_threshold) / 100)
-        adjusted_dupes = dupes / reduction_factor
-    elif distance_from_threshold < 0:
-        # Already below threshold - apply reduction
-        reduction_factor = 2 ** (abs(distance_from_threshold) / 100)
-        adjusted_dupes = dupes / reduction_factor
     else:
-        # Above threshold: use original formula
-        if salary_diff > 0:
-            adjusted_dupes = dupes + (
-                (distance_from_threshold / 100) * (2 + (salary_diff / 200)) *
-                (1 if salary_diff >= 0 else -1)
-            )
-        else:
-            adjusted_dupes = dupes
-    # Add ownership component
-    final_dupes = max(0, adjusted_dupes + (own_diff / 10))
-    return math.ceil(final_dupes)
-def reassess_lineup_edge(row: pd.Series, Contest_Size: int, prev_finish_percentile: float, prev_dupes: int) -> float:
-    fp_rate = row['Finish_percentile'] / prev_finish_percentile
-    if prev_dupes > 0:
-        dupe_rate = row['Dupes'] / prev_dupes
-    else:
-        dupe_rate = min(row['Dupes'], 3)
-    lineup_edge = (fp_rate * dupe_rate)
-    return (row['Lineup Edge'] + (row['Lineup Edge'] * lineup_edge)) / 2
-def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame, maps_dict: dict, Contest_Size: int, salary_max: int) -> pd.DataFrame:
-    orig_df = original_frame.copy()
-    orig_df = orig_df.reset_index(drop=True)
-    refactored_df = refactored_frame.copy()
-    refactored_df = refactored_df.reset_index(drop=True)
-    refactored_df['salary_diff'] = (refactored_df['salary'] - orig_df['salary']).astype('int16')
-    refactored_df['median_diff'] = (refactored_df['median'] - orig_df['median']).astype('float32')
-    refactored_df['own_diff'] = (refactored_df['Own'] - orig_df['Own']).astype('float32')
-    change_mask = refactored_df[refactored_df['median_diff'] != 0]
-    salary_col_index = refactored_df.columns.get_loc('salary')
-    num_players = salary_col_index
-    own_columns = [f'player_{i}_own' for i in range(1, num_players)]
-    for col in range(num_players):
-        refactored_df[f'player_{col}_own'] = refactored_df.iloc[:,col].map(maps_dict['own_map']).astype('float32') / 100
-    for lineups in change_mask.index:
-        prev_finish_percentile = refactored_df.loc[lineups, 'Finish_percentile']
-        prev_dupes = refactored_df.loc[lineups, 'Dupes']
-        refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max)
-        refactored_df.loc[lineups, 'Finish_percentile'] = max(reassess_finish_percentile(refactored_df.loc[lineups, :]), .005 + refactored_df.loc[lineups, 'Win%'])
-        refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
-        refactored_df.loc[lineups, 'Lineup Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size, prev_finish_percentile, prev_dupes)
-        refactored_df.loc[lineups, 'Weighted Own'] = calculate_weighted_ownership_single_row(refactored_df.loc[lineups, own_columns])
-        refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(), 1 / len(own_columns))
-    refactored_df = refactored_df.drop(columns=['salary_diff', 'median_diff', 'own_diff', 'player_0_own'] + own_columns)
-    return refactored_df

 import pandas as pd
+from global_func.predict_dupes import predict_dupes
+def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_dict: dict, site_var: str, type_var: str, Contest_Size: int, strength_var: str, sport_var: str) -> pd.DataFrame:
     """
+    Reassess edge by re-inserting modified rows into the base frame, running predict_dupes,
+    and then extracting the updated modified rows.
     Args:
+        modified_frame: DataFrame with rows that were modified by exposure_spread
+        base_frame: Original base frame (base_frame for Portfolio, original export_base for Export)
+        maps_dict: Dictionary containing player mappings
+        site_var: Site variable (Draftkings/Fanduel)
+        type_var: Type variable (Classic/Showdown)
+        Contest_Size: Contest size for calculations
+        strength_var: Strength variable (Weak/Average/Sharp)
+        sport_var: Sport variable
     Returns:
+        DataFrame: Updated modified_frame with recalculated metrics
     """
+    # Create a copy of the base frame
+    combined_frame = base_frame.copy()
+    # Get the player columns (excluding salary, median, Own)
+    player_columns = [col for col in modified_frame.columns if col not in ['salary', 'median', 'Own']]
+    # For each modified row, find and replace the corresponding row in the combined frame
+    for idx, modified_row in modified_frame.iterrows():
+        # Find the row in combined_frame that matches the player composition
+        for combined_idx, combined_row in combined_frame.iterrows():
+            if all(modified_row[col] == combined_row[col] for col in player_columns):
+                # Replace the row with the modified version
+                combined_frame.loc[combined_idx] = modified_row
+                break
+    # Run predict_dupes on the combined frame
+    updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var)
+    # Extract the updated modified rows
+    updated_modified_rows = []
+    for idx, modified_row in modified_frame.iterrows():
+        for combined_idx, combined_row in updated_combined_frame.iterrows():
+            if all(modified_row[col] == combined_row[col] for col in player_columns):
+                updated_modified_rows.append(combined_row)
+                break
+    # Convert back to DataFrame
+    if updated_modified_rows:
+        result_frame = pd.DataFrame(updated_modified_rows)
+        result_frame = result_frame.reset_index(drop=True)
+        return result_frame
     else:
+        # If no matches found, return the original modified_frame
+        return modified_frame