James McCool commited on
Commit
1748ccd
·
1 Parent(s): 89f3a60

Add stack exposure calculations and refactor player exposure handling in app.py

Browse files

- Introduced the create_stack_exposures function to calculate stack exposures, enhancing data analysis capabilities.
- Updated app.py to utilize the new function, improving the organization and clarity of stack exposure data presentation.
- Refactored player exposure handling to incorporate new session state variables for field player and stack frames, ensuring consistent data management across user interactions.

app.py CHANGED
@@ -11,6 +11,7 @@ from global_func.load_contest_file import load_contest_file
11
  from global_func.load_file import load_file
12
  from global_func.find_name_mismatches import find_name_mismatches
13
  from global_func.create_player_exposures import create_player_exposures
 
14
 
15
  player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
16
  if 'calc_toggle' not in st.session_state:
@@ -171,6 +172,8 @@ with tab2:
171
  working_df['percentile_finish'] = working_df['index'].rank(pct=True)
172
  working_df['finish'] = working_df['index']
173
  working_df = working_df.drop(['sorted', 'index'], axis=1)
 
 
174
 
175
  with st.expander("Info and filters"):
176
  if st.button('Clear data', key='reset3'):
@@ -237,81 +240,54 @@ with tab2:
237
  with st.container():
238
  tab1, tab2, tab3 = st.tabs(['Player Used Info', 'Stack Used Info', 'Duplication Info'])
239
  with tab1:
240
- st.session_state['field_frame'] = create_player_exposures(working_df, player_columns)
 
241
  if entry_parse_var == 'All':
242
  st.session_state['player_frame'] = create_player_exposures(working_df, player_columns)
243
- st.dataframe(st.session_state['player_frame'].
 
 
 
 
244
  sort_values(by='Exposure Overall', ascending=False).
245
  style.background_gradient(cmap='RdYlGn').
246
- format(formatter='{:.2%}', subset=st.session_state['player_frame'].select_dtypes(include=['number']).columns),
247
  hide_index=True)
248
  else:
249
  st.session_state['player_frame'] = create_player_exposures(working_df, player_columns, entry_names)
250
- st.dataframe(st.session_state['player_frame'].
 
 
 
 
251
  sort_values(by='Exposure Overall', ascending=False).
252
  style.background_gradient(cmap='RdYlGn').
253
- format(formatter='{:.2%}', subset=st.session_state['player_frame'].select_dtypes(include=['number']).columns),
254
  hide_index=True)
255
  with tab2:
 
 
256
  if entry_parse_var == 'All':
257
- overall_stacks = pd.Series(list(working_df['stack'])).value_counts()
258
- top_1per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01]['stack'])).value_counts()
259
- top_5per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05]['stack'])).value_counts()
260
- top_10per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10]['stack'])).value_counts()
261
- top_20per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20]['stack'])).value_counts()
262
- stacks_contest_len = len(working_df)
263
- stacks_len_1per = len(working_df[working_df['percentile_finish'] <= 0.01])
264
- stacks_len_5per = len(working_df[working_df['percentile_finish'] <= 0.05])
265
- stacks_len_10per = len(working_df[working_df['percentile_finish'] <= 0.10])
266
- stacks_len_20per = len(working_df[working_df['percentile_finish'] <= 0.20])
267
- each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%']
268
- each_stacks_set = [overall_stacks, top_1per_stacks, top_5per_stacks, top_10per_stacks, top_20per_stacks]
269
- each_stacks_len_set = [stacks_contest_len, stacks_len_1per, stacks_len_5per, stacks_len_10per, stacks_len_20per]
270
- stack_count_var = 0
271
- for each_stack in each_stacks_set:
272
- stack_frame = each_stack.to_frame().reset_index().rename(columns={'index': 'Stack', 'count': 'Count'})
273
- stack_frame['Percent'] = stack_frame['Count'] / each_stacks_len_set[stack_count_var]
274
- stack_frame = stack_frame[['Stack', 'Percent']]
275
- stack_frame = stack_frame.rename(columns={'Percent': f'Exposure {each_set_name[stack_count_var]}'})
276
- if 'stack_frame' not in st.session_state:
277
- st.session_state['stack_frame'] = stack_frame
278
- else:
279
- st.session_state['stack_frame'] = pd.merge(st.session_state['stack_frame'], stack_frame, on='Stack', how='outer')
280
- stack_count_var += 1
281
- st.dataframe(st.session_state['stack_frame'].
282
  sort_values(by='Exposure Overall', ascending=False).
283
  style.background_gradient(cmap='RdYlGn').
284
- format(formatter='{:.2%}', subset=st.session_state['stack_frame'].select_dtypes(include=['number']).columns),
285
  hide_index=True)
286
  else:
287
- overall_stacks = pd.Series(list(working_df[working_df['BaseName'].isin(entry_names)]['stack'])).value_counts()
288
- top_1per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01]['stack'])).value_counts()
289
- top_5per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05]['stack'])).value_counts()
290
- top_10per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10]['stack'])).value_counts()
291
- top_20per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20]['stack'])).value_counts()
292
- stacks_contest_len = len(working_df)
293
- stacks_len_1per = len(working_df[working_df['percentile_finish'] <= 0.01])
294
- stacks_len_5per = len(working_df[working_df['percentile_finish'] <= 0.05])
295
- stacks_len_10per = len(working_df[working_df['percentile_finish'] <= 0.10])
296
- stacks_len_20per = len(working_df[working_df['percentile_finish'] <= 0.20])
297
- each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%']
298
- each_stacks_set = [overall_stacks, top_1per_stacks, top_5per_stacks, top_10per_stacks, top_20per_stacks]
299
- each_stacks_len_set = [stacks_contest_len, stacks_len_1per, stacks_len_5per, stacks_len_10per, stacks_len_20per]
300
- stack_count_var = 0
301
- for each_stack in each_stacks_set:
302
- stack_frame = each_stack.to_frame().reset_index().rename(columns={'index': 'Stack', 'count': 'Count'})
303
- stack_frame['Percent'] = stack_frame['Count'] / each_stacks_len_set[stack_count_var]
304
- stack_frame = stack_frame[['Stack', 'Percent']]
305
- stack_frame = stack_frame.rename(columns={'Percent': f'Exposure {each_set_name[stack_count_var]}'})
306
- if 'stack_frame' not in st.session_state:
307
- st.session_state['stack_frame'] = stack_frame
308
- else:
309
- st.session_state['stack_frame'] = pd.merge(st.session_state['stack_frame'], stack_frame, on='Stack', how='outer')
310
- stack_count_var += 1
311
- st.dataframe(st.session_state['stack_frame'].
312
  sort_values(by='Exposure Overall', ascending=False).
313
  style.background_gradient(cmap='RdYlGn').
314
- format(formatter='{:.2%}', subset=st.session_state['stack_frame'].select_dtypes(include=['number']).columns),
315
  hide_index=True)
316
  with tab3:
317
  st.write('holding')
 
11
  from global_func.load_file import load_file
12
  from global_func.find_name_mismatches import find_name_mismatches
13
  from global_func.create_player_exposures import create_player_exposures
14
+ from global_func.create_stack_exposures import create_stack_exposures
15
 
16
  player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
17
  if 'calc_toggle' not in st.session_state:
 
172
  working_df['percentile_finish'] = working_df['index'].rank(pct=True)
173
  working_df['finish'] = working_df['index']
174
  working_df = working_df.drop(['sorted', 'index'], axis=1)
175
+ st.session_state['field_player_frame'] = create_player_exposures(working_df, player_columns)
176
+ st.session_state['field_stack_frame'] = create_stack_exposures(working_df)
177
 
178
  with st.expander("Info and filters"):
179
  if st.button('Clear data', key='reset3'):
 
240
  with st.container():
241
  tab1, tab2, tab3 = st.tabs(['Player Used Info', 'Stack Used Info', 'Duplication Info'])
242
  with tab1:
243
+ player_view_var = st.radio("View Exposures by:", ['Percentage used', 'Relation to the field'], key='player_view_var')
244
+
245
  if entry_parse_var == 'All':
246
  st.session_state['player_frame'] = create_player_exposures(working_df, player_columns)
247
+ if player_view_var == 'Percentage used':
248
+ player_frame_display = st.session_state['player_frame']
249
+ elif player_view_var == 'Relation to the field':
250
+ player_frame_display = st.session_state['player_frame'] - st.session_state['field_player_frame']
251
+ st.dataframe(player_frame_display.
252
  sort_values(by='Exposure Overall', ascending=False).
253
  style.background_gradient(cmap='RdYlGn').
254
+ format(formatter='{:.2%}', subset=player_frame_display.select_dtypes(include=['number']).columns),
255
  hide_index=True)
256
  else:
257
  st.session_state['player_frame'] = create_player_exposures(working_df, player_columns, entry_names)
258
+ if player_view_var == 'Percentage used':
259
+ player_frame_display = st.session_state['player_frame']
260
+ elif player_view_var == 'Relation to the field':
261
+ player_frame_display = st.session_state['player_frame'] - st.session_state['field_player_frame']
262
+ st.dataframe(player_frame_display.
263
  sort_values(by='Exposure Overall', ascending=False).
264
  style.background_gradient(cmap='RdYlGn').
265
+ format(formatter='{:.2%}', subset=player_frame_display.select_dtypes(include=['number']).columns),
266
  hide_index=True)
267
  with tab2:
268
+ stack_view_var = st.radio('View Stack Exposures by:', ['Percentage used', 'Relation to the field'], key = 'stack_view_var')
269
+
270
  if entry_parse_var == 'All':
271
+ st.session_state['stack_frame'] = create_stack_exposures(working_df)
272
+ if stack_view_var == 'Percentage used':
273
+ stack_frame_display = st.session_state['stack_frame']
274
+ elif stack_view_var == 'Relation to the field':
275
+ stack_frame_display = st.session_state['stack_frame'] - st.session_state['field_stack_frame']
276
+ st.dataframe(stack_frame_display.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  sort_values(by='Exposure Overall', ascending=False).
278
  style.background_gradient(cmap='RdYlGn').
279
+ format(formatter='{:.2%}', subset=stack_frame_display.select_dtypes(include=['number']).columns),
280
  hide_index=True)
281
  else:
282
+ st.session_state['stack_frame'] = create_stack_exposures(working_df, entry_names)
283
+ if stack_view_var == 'Percentage used':
284
+ stack_frame_display = st.session_state['stack_frame']
285
+ elif stack_view_var == 'Relation to the field':
286
+ stack_frame_display = st.session_state['stack_frame'] - st.session_state['field_stack_frame']
287
+ st.dataframe(stack_frame_display.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  sort_values(by='Exposure Overall', ascending=False).
289
  style.background_gradient(cmap='RdYlGn').
290
+ format(formatter='{:.2%}', subset=stack_frame_display.select_dtypes(include=['number']).columns),
291
  hide_index=True)
292
  with tab3:
293
  st.write('holding')
global_func/clean_player_name.py DELETED
@@ -1,16 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- import time
5
- from fuzzywuzzy import process
6
-
7
- def clean_player_name(name):
8
- # Handle colon case first (remove everything before colon)
9
- if ':' in name:
10
- name = name.split(':')[1].strip()
11
-
12
- # Handle parentheses case (remove everything after opening parenthesis)
13
- if '(' in name:
14
- name = name.split('(')[0].strip()
15
-
16
- return name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
global_func/create_stack_exposures.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def create_stack_exposures(df: pd.DataFrame, entrants: list = None):
4
+ stack_exposures = pd.DataFrame()
5
+ if entrants is not None:
6
+ overall_stacks = pd.Series(list(df[df['BaseName'].isin(entrants)]['stack'])).value_counts()
7
+ else:
8
+ overall_stacks = pd.Series(list(df['stack'])).value_counts()
9
+ top_1per_stacks = pd.Series(list(df[df['percentile_finish'] <= 0.01]['stack'])).value_counts()
10
+ top_5per_stacks = pd.Series(list(df[df['percentile_finish'] <= 0.05]['stack'])).value_counts()
11
+ top_10per_stacks = pd.Series(list(df[df['percentile_finish'] <= 0.10]['stack'])).value_counts()
12
+ top_20per_stacks = pd.Series(list(df[df['percentile_finish'] <= 0.20]['stack'])).value_counts()
13
+ stacks_contest_len = len(df)
14
+ stacks_len_1per = len(df[df['percentile_finish'] <= 0.01])
15
+ stacks_len_5per = len(df[df['percentile_finish'] <= 0.05])
16
+ stacks_len_10per = len(df[df['percentile_finish'] <= 0.10])
17
+ stacks_len_20per = len(df[df['percentile_finish'] <= 0.20])
18
+ each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%']
19
+ each_stacks_set = [overall_stacks, top_1per_stacks, top_5per_stacks, top_10per_stacks, top_20per_stacks]
20
+ each_stacks_len_set = [stacks_contest_len, stacks_len_1per, stacks_len_5per, stacks_len_10per, stacks_len_20per]
21
+ stack_count_var = 0
22
+ for each_stack in each_stacks_set:
23
+ stack_frame = each_stack.to_frame().reset_index().rename(columns={'index': 'Stack', 'count': 'Count'})
24
+ stack_frame['Percent'] = stack_frame['Count'] / each_stacks_len_set[stack_count_var]
25
+ stack_frame = stack_frame[['Stack', 'Percent']]
26
+ stack_frame = stack_frame.rename(columns={'Percent': f'Exposure {each_set_name[stack_count_var]}'})
27
+ if len(stack_exposures) == 0:
28
+ stack_exposures = stack_frame
29
+ else:
30
+ stack_exposures = pd.merge(stack_exposures, stack_frame, on='Stack', how='outer')
31
+ stack_count_var += 1
32
+
33
+ return stack_exposures
global_func/find_csv_mismatches.py DELETED
@@ -1,93 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- from fuzzywuzzy import process
5
-
6
- def find_csv_mismatches(csv_df, projections_df):
7
- # Create copies of the dataframes to avoid modifying the originals
8
- csv_df = csv_df.copy()
9
- projections_df = projections_df.copy()
10
-
11
- if 'Name' not in csv_df.columns:
12
- st.error("No 'Name' column found in CSV file")
13
- return csv_df
14
-
15
- if 'player_names' not in projections_df.columns:
16
- st.error("No 'player_names' column found in projections file")
17
- return csv_df
18
-
19
- # Get unique player names from CSV and projections
20
- csv_players = set(csv_df['Name'].dropna().unique())
21
- projection_players = set(projections_df['player_names'].unique())
22
- projection_players_list = list(csv_players)
23
-
24
- # Find players in CSV that are missing from projections
25
- players_missing_from_projections = list(projection_players - csv_players)
26
-
27
- # Automatically handle 100% matches before starting interactive process
28
- players_to_process = []
29
- for player in players_missing_from_projections:
30
- if not isinstance(player, str):
31
- st.warning(f"Skipping non-string value: {player}")
32
- continue
33
- closest_matches = process.extract(player, projection_players_list, limit=1)
34
- if closest_matches[0][1] == 100: # If perfect match found
35
- match_name = closest_matches[0][0]
36
- # Update CSV DataFrame to use the projection name
37
- csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name
38
- st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
39
- else:
40
- players_to_process.append(player)
41
-
42
- # Initialize session state for tracking current player if not exists
43
- if 'csv_current_player_index' not in st.session_state:
44
- st.session_state.csv_current_player_index = 0
45
- st.session_state.csv_players_to_process = players_to_process
46
-
47
- # Display results
48
- if players_missing_from_projections:
49
- st.warning("Players in CSV but missing from projections")
50
-
51
- # Display remaining players
52
- remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:]
53
- st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
54
- "\n".join(f"- {player}" for player in remaining_players))
55
-
56
- if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process):
57
- current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index]
58
-
59
- # Find the top 3 closest matches
60
- closest_matches = process.extract(current_player, projection_players_list, limit=3)
61
-
62
- st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}")
63
-
64
- # Create radio buttons for selection
65
- options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
66
- options.append("None of these")
67
-
68
- selected_option = st.radio(
69
- f"Select correct match:",
70
- options,
71
- key=f"csv_radio_{current_player}"
72
- )
73
-
74
- if st.button("Confirm Selection", key="csv_confirm"):
75
- if selected_option != "None of these":
76
- selected_name = selected_option.split(" (")[0]
77
- # Update CSV DataFrame
78
- csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name
79
- st.success(f"Replaced '{current_player}' with '{selected_name}'")
80
- st.session_state['csv_file'] = csv_df
81
-
82
- # Move to next player
83
- st.session_state.csv_current_player_index += 1
84
- st.rerun()
85
- else:
86
- st.success("All players have been processed!")
87
- # Reset the index for future runs
88
- st.session_state.csv_current_player_index = 0
89
- st.session_state.csv_players_to_process = []
90
- else:
91
- st.success("All CSV players found in projections!")
92
-
93
- return csv_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
global_func/highlight_rows.py DELETED
@@ -1,29 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- import time
5
- from fuzzywuzzy import process
6
-
7
- def highlight_changes(row):
8
- original_row = st.session_state['portfolio'].iloc[row.name]
9
- colors = [''] * len(row)
10
- for i, (orig, new) in enumerate(zip(original_row, row)):
11
- if orig != new:
12
- colors[i] = 'background-color: yellow'
13
- return colors
14
-
15
- def highlight_changes_winners(row):
16
- original_row = st.session_state['optimized_df_medians'].iloc[row.name]
17
- colors = [''] * len(row)
18
- for i, (orig, new) in enumerate(zip(original_row, row)):
19
- if orig != new:
20
- colors[i] = 'background-color: aqua'
21
- return colors
22
-
23
- def highlight_changes_losers(row):
24
- original_row = st.session_state['optimized_df_winners'].iloc[row.name]
25
- colors = [''] * len(row)
26
- for i, (orig, new) in enumerate(zip(original_row, row)):
27
- if orig != new:
28
- colors[i] = 'background-color: darksalmon'
29
- return colors
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
global_func/load_csv.py DELETED
@@ -1,24 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- import time
5
- from fuzzywuzzy import process
6
-
7
- def load_csv(upload):
8
- if upload is not None:
9
- try:
10
- if upload.name.endswith('.csv'):
11
- df = pd.read_csv(upload)
12
- try:
13
- df['Name + ID'] = df['Name'] + ' (' + df['ID'].astype(str) + ')'
14
- except:
15
- pass
16
- else:
17
- st.error('Please upload either a CSV or Excel file')
18
- return None
19
-
20
- return df
21
- except Exception as e:
22
- st.error(f'Error loading file: {str(e)}')
23
- return None
24
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
global_func/load_ss_file.py DELETED
@@ -1,34 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- import time
5
- from fuzzywuzzy import process
6
-
7
- def load_ss_file(lineups, csv_file):
8
- df = csv_file.copy()
9
- try:
10
- name_dict = dict(zip(df['ID'], df['Name']))
11
- except:
12
- name_dict = dict(zip(df['Id'], df['Nickname']))
13
-
14
- # Now load and process the lineups file
15
- try:
16
- if lineups.name.endswith('.csv'):
17
- lineups_df = pd.read_csv(lineups)
18
- elif lineups.name.endswith(('.xls', '.xlsx')):
19
- lineups_df = pd.read_excel(lineups)
20
- else:
21
- st.error('Please upload either a CSV or Excel file for lineups')
22
- return None, None
23
-
24
- export_df = lineups_df.copy()
25
-
26
- # Map the IDs to names
27
- for col in lineups_df.columns:
28
- lineups_df[col] = lineups_df[col].map(name_dict)
29
-
30
- return export_df, lineups_df
31
-
32
- except Exception as e:
33
- st.error(f'Error loading lineups file: {str(e)}')
34
- return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
global_func/optimize_lineup.py DELETED
@@ -1,74 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- import time
5
- from fuzzywuzzy import process
6
-
7
- def optimize_lineup(row):
8
- current_lineup = []
9
- total_salary = 0
10
- salary_cap = 50000
11
- used_players = set()
12
-
13
- # Convert row to dictionary with roster positions
14
- roster = {}
15
- for col, player in zip(row.index, row):
16
- if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Lineup Edge']:
17
- roster[col] = {
18
- 'name': player,
19
- 'position': map_dict['pos_map'].get(player, '').split('/'),
20
- 'team': map_dict['team_map'].get(player, ''),
21
- 'salary': map_dict['salary_map'].get(player, 0),
22
- 'median': map_dict['proj_map'].get(player, 0),
23
- 'ownership': map_dict['own_map'].get(player, 0)
24
- }
25
- total_salary += roster[col]['salary']
26
- used_players.add(player)
27
-
28
- # Optimize each roster position in random order
29
- roster_positions = list(roster.items())
30
- random.shuffle(roster_positions)
31
-
32
- for roster_pos, current in roster_positions:
33
- # Skip optimization for players from removed teams
34
- if current['team'] in remove_teams_var:
35
- continue
36
-
37
- valid_positions = position_rules[roster_pos]
38
- better_options = []
39
-
40
- # Find valid replacements for this roster position
41
- for pos in valid_positions:
42
- if pos in position_groups:
43
- pos_options = [
44
- p for p in position_groups[pos]
45
- if p['median'] > current['median']
46
- and (total_salary - current['salary'] + p['salary']) <= salary_cap
47
- and p['player_names'] not in used_players
48
- and any(valid_pos in p['positions'] for valid_pos in valid_positions)
49
- and map_dict['team_map'].get(p['player_names']) not in remove_teams_var # Check team restriction
50
- ]
51
- better_options.extend(pos_options)
52
-
53
- if better_options:
54
- # Remove duplicates
55
- better_options = {opt['player_names']: opt for opt in better_options}.values()
56
-
57
- # Sort by median projection and take the best one
58
- best_replacement = max(better_options, key=lambda x: x['median'])
59
-
60
- # Update the lineup and tracking variables
61
- used_players.remove(current['name'])
62
- used_players.add(best_replacement['player_names'])
63
- total_salary = total_salary - current['salary'] + best_replacement['salary']
64
- roster[roster_pos] = {
65
- 'name': best_replacement['player_names'],
66
- 'position': map_dict['pos_map'][best_replacement['player_names']].split('/'),
67
- 'team': map_dict['team_map'][best_replacement['player_names']],
68
- 'salary': best_replacement['salary'],
69
- 'median': best_replacement['median'],
70
- 'ownership': best_replacement['ownership']
71
- }
72
-
73
- # Return optimized lineup maintaining original column order
74
- return [roster[pos]['name'] for pos in row.index if pos in roster]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
global_func/predict_dupes.py DELETED
@@ -1,188 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- import time
5
- from fuzzywuzzy import process
6
-
7
- def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var):
8
- if strength_var == 'Weak':
9
- dupes_multiplier = .75
10
- percentile_multiplier = .90
11
- elif strength_var == 'Average':
12
- dupes_multiplier = 1.00
13
- percentile_multiplier = 1.00
14
- elif strength_var == 'Sharp':
15
- dupes_multiplier = 1.25
16
- percentile_multiplier = 1.10
17
- max_ownership = max(maps_dict['own_map'].values()) / 100
18
- average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100
19
- if site_var == 'Fanduel':
20
- if type_var == 'Showdown':
21
- dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
22
- own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
23
- calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
24
- flex_ownerships = pd.concat([
25
- portfolio.iloc[:,1].map(maps_dict['own_map']),
26
- portfolio.iloc[:,2].map(maps_dict['own_map']),
27
- portfolio.iloc[:,3].map(maps_dict['own_map']),
28
- portfolio.iloc[:,4].map(maps_dict['own_map'])
29
- ])
30
- flex_rank = flex_ownerships.rank(pct=True)
31
-
32
- # Assign ranks back to individual columns using the same rank scale
33
- portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
34
- portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
35
- portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
36
- portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
37
- portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
38
-
39
- portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
40
- portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
41
- portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
42
- portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
43
- portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
44
-
45
- portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
46
- portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
47
- portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
48
- portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
49
-
50
- # Calculate dupes formula
51
- portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100)
52
- portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
53
-
54
- # Round and handle negative values
55
- portfolio['Dupes'] = np.where(
56
- np.round(portfolio['dupes_calc'], 0) <= 0,
57
- 0,
58
- np.round(portfolio['dupes_calc'], 0) - 1
59
- )
60
- if type_var == 'Classic':
61
- num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
62
- dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
63
- own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
64
- calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
65
- for i in range(1, num_players + 1):
66
- portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
67
- portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
68
-
69
- portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
70
- portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
71
- portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
72
- portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
73
-
74
- portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100)
75
- portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
76
- # Round and handle negative values
77
- portfolio['Dupes'] = np.where(
78
- np.round(portfolio['dupes_calc'], 0) <= 0,
79
- 0,
80
- np.round(portfolio['dupes_calc'], 0) - 1
81
- )
82
-
83
- elif site_var == 'Draftkings':
84
- if type_var == 'Showdown':
85
- dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
86
- own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
87
- calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
88
- flex_ownerships = pd.concat([
89
- portfolio.iloc[:,1].map(maps_dict['own_map']),
90
- portfolio.iloc[:,2].map(maps_dict['own_map']),
91
- portfolio.iloc[:,3].map(maps_dict['own_map']),
92
- portfolio.iloc[:,4].map(maps_dict['own_map']),
93
- portfolio.iloc[:,5].map(maps_dict['own_map'])
94
- ])
95
- flex_rank = flex_ownerships.rank(pct=True)
96
-
97
- # Assign ranks back to individual columns using the same rank scale
98
- portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
99
- portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
100
- portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
101
- portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
102
- portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
103
- portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
104
-
105
- portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
106
- portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
107
- portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
108
- portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
109
- portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
110
- portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100
111
-
112
- portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
113
- portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
114
- portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
115
- portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
116
-
117
- # Calculate dupes formula
118
- portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100)
119
- portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
120
-
121
- # Round and handle negative values
122
- portfolio['Dupes'] = np.where(
123
- np.round(portfolio['dupes_calc'], 0) <= 0,
124
- 0,
125
- np.round(portfolio['dupes_calc'], 0) - 1
126
- )
127
- if type_var == 'Classic':
128
- num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
129
- dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
130
- own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
131
- calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
132
- for i in range(1, num_players + 1):
133
- portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
134
- portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
135
-
136
- portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
137
- portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
138
- portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
139
- portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
140
-
141
- portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100)
142
- portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
143
- # Round and handle negative values
144
- portfolio['Dupes'] = np.where(
145
- np.round(portfolio['dupes_calc'], 0) <= 0,
146
- 0,
147
- np.round(portfolio['dupes_calc'], 0) - 1
148
- )
149
-
150
- portfolio['Dupes'] = np.round(portfolio['Dupes'], 0)
151
- portfolio['own_ratio'] = np.where(
152
- portfolio[own_columns].isin([max_ownership]).any(axis=1),
153
- portfolio['own_sum'] / portfolio['own_average'],
154
- (portfolio['own_sum'] - max_ownership) / portfolio['own_average']
155
- )
156
- percentile_cut_scalar = portfolio['median'].max() # Get scalar value
157
- if type_var == 'Classic':
158
- own_ratio_nerf = 2
159
- elif type_var == 'Showdown':
160
- own_ratio_nerf = 1.5
161
- portfolio['Finish_percentile'] = portfolio.apply(
162
- lambda row: .0005 if (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2) < .0005
163
- else (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2),
164
- axis=1
165
- )
166
-
167
- portfolio['Ref_Proj'] = portfolio['median'].max()
168
- portfolio['Max_Proj'] = portfolio['Ref_Proj'] + 10
169
- portfolio['Min_Proj'] = portfolio['Ref_Proj'] - 10
170
- portfolio['Avg_Ref'] = (portfolio['Max_Proj'] + portfolio['Min_Proj']) / 2
171
- portfolio['Win%'] = (((portfolio['median'] / portfolio['Avg_Ref']) - (0.1 + ((portfolio['Ref_Proj'] - portfolio['median'])/100))) / (Contest_Size / 1000)) / 10
172
- max_allowed_win = (1 / Contest_Size) * 5
173
- portfolio['Win%'] = portfolio['Win%'] / portfolio['Win%'].max() * max_allowed_win
174
-
175
- portfolio['Finish_percentile'] = portfolio['Finish_percentile'] + .005 + (.005 * (Contest_Size / 10000))
176
- portfolio['Finish_percentile'] = portfolio['Finish_percentile'] * percentile_multiplier
177
- portfolio['Win%'] = portfolio['Win%'] * (1 - portfolio['Finish_percentile'])
178
-
179
- portfolio['low_own_count'] = portfolio[own_columns].apply(lambda row: (row < 0.10).sum(), axis=1)
180
- portfolio['Finish_percentile'] = portfolio.apply(lambda row: row['Finish_percentile'] if row['low_own_count'] <= 0 else row['Finish_percentile'] / row['low_own_count'], axis=1)
181
- portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5))
182
- portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1)
183
- portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
184
- portfolio = portfolio.drop(columns=dup_count_columns)
185
- portfolio = portfolio.drop(columns=own_columns)
186
- portfolio = portfolio.drop(columns=calc_columns)
187
-
188
- return portfolio