James McCool commited on
Commit
587326c
·
1 Parent(s): beef2ec

Refactor player removal and portfolio filtering logic in distribute_preset.py to improve accuracy in lineup generation. This update introduces a mechanism to continuously remove high-exposure players and ensures that the final portfolio meets the lineup target while maintaining performance metrics.

Browse files
Files changed (1) hide show
  1. global_func/distribute_preset.py +69 -64
global_func/distribute_preset.py CHANGED
@@ -5,76 +5,81 @@ def distribute_preset(portfolio: pd.DataFrame, lineup_target: int, exclude_cols:
5
 
6
  excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Similarity Score']
7
  player_columns = [col for col in portfolio.columns if col not in excluded_cols]
8
- for slack_var in range(1, 20):
9
- init_portfolio = pd.DataFrame(columns=portfolio.columns)
10
-
11
- for team in portfolio['Stack'].unique():
12
- rows_to_drop = []
 
 
13
  working_portfolio = portfolio.copy()
14
- working_portfolio = working_portfolio[working_portfolio['Stack'] == team].sort_values(by='median', ascending = False)
15
- working_portfolio = working_portfolio.reset_index(drop=True)
16
- curr_own_type_max = working_portfolio.loc[0, 'Similarity Score'] + (slack_var / 20 * working_portfolio.loc[0, 'Similarity Score'])
17
-
18
- for i in range(1, len(working_portfolio)):
19
- if working_portfolio.loc[i, 'Similarity Score'] > curr_own_type_max:
20
- rows_to_drop.append(i)
21
- else:
22
- curr_own_type_max = working_portfolio.loc[i, 'Similarity Score'] + (slack_var / 20 * working_portfolio.loc[i, 'Similarity Score'])
23
 
24
- working_portfolio = working_portfolio.drop(rows_to_drop).reset_index(drop=True)
25
- init_portfolio = pd.concat([init_portfolio, working_portfolio])
26
-
27
- if len(init_portfolio) >= lineup_target:
28
- init_portfolio.sort_values(by='median', ascending=True).head(lineup_target)
29
-
30
- player_list = set()
31
- player_stats = []
32
- for cols in init_portfolio.columns:
33
- if cols not in excluded_cols:
34
- player_list.update(init_portfolio[cols].unique())
35
-
36
- for player in player_list:
37
- # Select only the columns that are NOT in excluded_cols
38
- player_cols = [col for col in init_portfolio.columns if col not in excluded_cols]
39
- player_mask = init_portfolio[player_cols].apply(
40
- lambda row: player in list(row), axis=1
41
- )
42
-
43
- if player_mask.any():
44
- player_stats.append({
45
- 'Player': player,
46
- 'Lineup Count': player_mask.sum(),
47
- 'Exposure': player_mask.sum() / len(init_portfolio)
48
- })
49
-
50
- player_summary = pd.DataFrame(player_stats)
51
- print(player_summary.sort_values('Lineup Count', ascending=False).head(10))
52
- player_remove_list = player_summary.sort_values('Lineup Count', ascending=False).head(5)['Player'].tolist()
53
 
54
- for slack_var in range(1, 20):
55
- concat_portfolio = pd.DataFrame(columns=portfolio.columns)
 
 
 
 
 
 
56
 
57
- for player_out in player_remove_list:
58
- rows_to_drop = []
59
- working_portfolio = portfolio.copy()
60
- remove_mask = working_portfolio[player_columns].apply(
61
- lambda row: player_out not in list(row), axis=1
 
 
 
 
 
 
 
 
 
 
62
  )
63
- working_portfolio = working_portfolio[remove_mask]
64
- print(working_portfolio.head(10))
65
- working_portfolio = working_portfolio.sort_values(by='median', ascending=False).reset_index(drop=True)
66
- curr_own_type_max = working_portfolio.loc[0, 'Similarity Score'] + (slack_var / 20 * working_portfolio.loc[0, 'Similarity Score'])
67
-
68
- for i in range(1, len(working_portfolio)):
69
- if working_portfolio.loc[i, 'Similarity Score'] > curr_own_type_max:
70
- rows_to_drop.append(i)
71
- else:
72
- curr_own_type_max = working_portfolio.loc[i, 'Similarity Score'] + (slack_var / 20 * working_portfolio.loc[i, 'Similarity Score'])
73
 
74
- working_portfolio = working_portfolio.drop(rows_to_drop).reset_index(drop=True)
75
- concat_portfolio = pd.concat([concat_portfolio, working_portfolio.head(math.ceil(lineup_target / 5))])
 
 
 
 
76
 
77
- if len(concat_portfolio) >= lineup_target:
78
- return concat_portfolio.sort_values(by='median', ascending=False).head(lineup_target)
 
 
 
 
 
 
 
 
 
 
79
 
80
  return concat_portfolio.sort_values(by='median', ascending=False)
 
5
 
6
  excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Similarity Score']
7
  player_columns = [col for col in portfolio.columns if col not in excluded_cols]
8
+ player_remove_list = []
9
+
10
+ while True: # Continue until no more players need to be removed
11
+ for slack_var in range(1, 20):
12
+ concat_portfolio = pd.DataFrame(columns=portfolio.columns)
13
+
14
+ # Start with the original portfolio, removing players from player_remove_list
15
  working_portfolio = portfolio.copy()
 
 
 
 
 
 
 
 
 
16
 
17
+ # Remove all players in player_remove_list at once
18
+ if player_remove_list:
19
+ remove_mask = working_portfolio[player_columns].apply(
20
+ lambda row: not any(player in list(row) for player in player_remove_list), axis=1
21
+ )
22
+ working_portfolio = working_portfolio[remove_mask]
23
+
24
+ if len(working_portfolio) == 0:
25
+ # If no data left after removing players, return what we have
26
+ return concat_portfolio.sort_values(by='median', ascending=False)
27
+
28
+ # Apply similarity score filtering by team
29
+ for team in working_portfolio['Stack'].unique():
30
+ rows_to_drop = []
31
+ team_portfolio = working_portfolio[working_portfolio['Stack'] == team].sort_values(by='median', ascending=False)
32
+ team_portfolio = team_portfolio.reset_index(drop=True)
33
+
34
+ if len(team_portfolio) == 0:
35
+ continue
36
+
37
+ curr_own_type_max = team_portfolio.loc[0, 'Similarity Score'] + (slack_var / 20 * team_portfolio.loc[0, 'Similarity Score'])
 
 
 
 
 
 
 
 
38
 
39
+ for i in range(1, len(team_portfolio)):
40
+ if team_portfolio.loc[i, 'Similarity Score'] > curr_own_type_max:
41
+ rows_to_drop.append(i)
42
+ else:
43
+ curr_own_type_max = team_portfolio.loc[i, 'Similarity Score'] + (slack_var / 20 * team_portfolio.loc[i, 'Similarity Score'])
44
+
45
+ team_portfolio = team_portfolio.drop(rows_to_drop).reset_index(drop=True)
46
+ concat_portfolio = pd.concat([concat_portfolio, team_portfolio.head(math.ceil(lineup_target / 5))])
47
 
48
+ if len(concat_portfolio) >= lineup_target:
49
+ concat_portfolio = concat_portfolio.sort_values(by='median', ascending=False).head(lineup_target)
50
+ break
51
+
52
+ # Calculate player exposures from the current concat_portfolio
53
+ player_list = set()
54
+ player_stats = []
55
+ for cols in concat_portfolio.columns:
56
+ if cols not in excluded_cols:
57
+ player_list.update(concat_portfolio[cols].unique())
58
+
59
+ for player in player_list:
60
+ player_cols = [col for col in concat_portfolio.columns if col not in excluded_cols]
61
+ player_mask = concat_portfolio[player_cols].apply(
62
+ lambda row: player in list(row), axis=1
63
  )
 
 
 
 
 
 
 
 
 
 
64
 
65
+ if player_mask.any():
66
+ player_stats.append({
67
+ 'Player': player,
68
+ 'Lineup Count': player_mask.sum(),
69
+ 'Exposure': player_mask.sum() / len(concat_portfolio)
70
+ })
71
 
72
+ player_summary = pd.DataFrame(player_stats)
73
+ print(player_summary.sort_values('Lineup Count', ascending=False).head(10))
74
+
75
+ # Find players with exposure > 0.60
76
+ high_exposure_players = player_summary[player_summary['Exposure'] > 0.60]['Player'].tolist()
77
+
78
+ # Add new high-exposure players to the remove list
79
+ player_remove_list.extend(high_exposure_players)
80
+
81
+ # If no new players to remove and we have enough lineups, we're done
82
+ if len(high_exposure_players) == 0 and len(concat_portfolio) >= lineup_target:
83
+ break
84
 
85
  return concat_portfolio.sort_values(by='median', ascending=False)