James McCool commited on
Commit
2e3cd9d
·
1 Parent(s): 46a28f1

Add stratification functionality and enhance portfolio handling in app.py

Browse files

- Introduced a new stratification_function to allow users to generate lineups based on target similarity scores, enhancing lineup optimization capabilities.
- Updated app.py to integrate the new stratification feature, including user interface elements for selecting sorting criteria and lineup targets.
- Improved portfolio handling by converting the portfolio to a parquet format for better performance and memory efficiency.
- Enhanced data processing by ensuring the correct mapping of stack and size information from the portfolio, improving the accuracy of lineup analysis.

Files changed (2) hide show
  1. app.py +212 -199
  2. global_func/stratification_function.py +32 -0
app.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  from rapidfuzz import process
5
  import random
6
  from collections import Counter
 
7
 
8
  ## import global functions
9
  from global_func.clean_player_name import clean_player_name
@@ -23,6 +24,7 @@ from global_func.hedging_preset import hedging_preset
23
  from global_func.volatility_preset import volatility_preset
24
  from global_func.reduce_volatility_preset import reduce_volatility_preset
25
  from global_func.analyze_player_combos import analyze_player_combos
 
26
 
27
  freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
28
  stacking_sports = ['MLB', 'NHL', 'NFL']
@@ -128,6 +130,7 @@ with tab1:
128
  else:
129
  stack_dict = None
130
  if st.session_state['portfolio'] is not None:
 
131
  st.success('Portfolio file loaded successfully!')
132
  st.session_state['portfolio'] = st.session_state['portfolio'].apply(lambda x: x.replace(player_wrong_names_mlb, player_right_names_mlb))
133
  st.dataframe(st.session_state['portfolio'].head(10))
@@ -180,9 +183,10 @@ with tab1:
180
 
181
  projections = projections.apply(lambda x: x.replace(player_wrong_names_mlb, player_right_names_mlb))
182
  st.dataframe(projections.head(10))
183
-
184
  if portfolio_file and projections_file:
185
  if st.session_state['portfolio'] is not None and projections is not None:
 
186
  st.subheader("Name Matching Analysis")
187
  # Initialize projections_df in session state if it doesn't exist
188
  # Get unique names from portfolio
@@ -281,15 +285,82 @@ with tab1:
281
  ).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row[2:]) else 0,
282
  axis=1
283
  )
284
- stack_dict = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Stack']))
285
- size_dict = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Size']))
286
 
287
- working_frame = st.session_state['portfolio'].copy()
288
  try:
289
  st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Name'], st.session_state['csv_file']['Name + ID']))
290
  except:
291
  st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Nickname'], st.session_state['csv_file']['Id']))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
 
 
 
 
293
 
294
  # with tab2:
295
  # if st.button('Clear data', key='reset2'):
@@ -804,7 +875,7 @@ with tab1:
804
  # )
805
 
806
  with tab2:
807
- if 'portfolio' in st.session_state and 'projections_df' in st.session_state:
808
  with st.container():
809
  col1, col2 = st.columns(2)
810
  with col1:
@@ -828,70 +899,8 @@ with tab2:
828
 
829
  if 'working_frame' not in st.session_state:
830
  st.session_state['settings_base'] = True
831
- st.session_state['working_frame'] = st.session_state['origin_portfolio'].copy()
832
- if site_var == 'Draftkings':
833
- if type_var == 'Classic':
834
- if sport_var == 'CS2':
835
- st.session_state['map_dict'] = {
836
- 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
837
- 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
838
- 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
839
- 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
840
- 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
841
- 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
842
- 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'] * 1.5)),
843
- 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
844
- 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
845
- }
846
- elif sport_var != 'CS2':
847
- st.session_state['map_dict'] = {
848
- 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
849
- 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
850
- 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
851
- 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
852
- 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
853
- 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
854
- 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
855
- 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
856
- 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
857
- }
858
- elif type_var == 'Showdown':
859
- if sport_var == 'GOLF':
860
- st.session_state['map_dict'] = {
861
- 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
862
- 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
863
- 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
864
- 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
865
- 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
866
- 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
867
- 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
868
- 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
869
- 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership']))
870
- }
871
- if sport_var != 'GOLF':
872
- st.session_state['map_dict'] = {
873
- 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
874
- 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
875
- 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
876
- 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
877
- 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
878
- 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
879
- 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'] * 1.5)),
880
- 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
881
- 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
882
- }
883
- elif site_var == 'Fanduel':
884
- st.session_state['map_dict'] = {
885
- 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
886
- 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
887
- 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
888
- 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
889
- 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
890
- 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
891
- 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
892
- 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
893
- 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
894
- }
895
  if type_var == 'Classic':
896
  if sport_var == 'CS2':
897
  # Calculate salary (CPT uses cpt_salary_map, others use salary_map)
@@ -919,9 +928,9 @@ with tab2:
919
  st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
920
  st.session_state['working_frame']['median'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
921
  st.session_state['working_frame']['Own'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
922
- if stack_dict is not None:
923
- st.session_state['working_frame']['Stack'] = st.session_state['working_frame'].index.map(stack_dict)
924
- st.session_state['working_frame']['Size'] = st.session_state['working_frame'].index.map(size_dict)
925
  elif type_var == 'Showdown':
926
  # Calculate salary (CPT uses cpt_salary_map, others use salary_map)
927
  st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(
@@ -943,20 +952,14 @@ with tab2:
943
  sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
944
  axis=1
945
  )
 
 
 
 
946
  st.session_state['base_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
947
  st.session_state['working_frame'] = st.session_state['base_frame'].copy()
948
  # st.session_state['highest_owned_teams'] = st.session_state['projections_df'][~st.session_state['projections_df']['position'].isin(['P', 'SP'])].groupby('team')['ownership'].sum().sort_values(ascending=False).head(3).index.tolist()
949
  # st.session_state['highest_owned_pitchers'] = st.session_state['projections_df'][st.session_state['projections_df']['position'].isin(['P', 'SP'])]['player_names'].sort_values(by='ownership', ascending=False).head(3).tolist()
950
- if 'info_columns_dict' not in st.session_state:
951
- st.session_state['info_columns_dict'] = {
952
- 'Dupes': st.session_state['working_frame']['Dupes'],
953
- 'Finish_percentile': st.session_state['working_frame']['Finish_percentile'],
954
- 'Win%': st.session_state['working_frame']['Win%'],
955
- 'Lineup Edge': st.session_state['working_frame']['Lineup Edge'],
956
- 'Weighted Own': st.session_state['working_frame']['Weighted Own'],
957
- 'Geomean': st.session_state['working_frame']['Geomean'],
958
- 'Diversity': st.session_state['working_frame']['Diversity']
959
- }
960
 
961
  if 'trimming_dict_maxes' not in st.session_state:
962
  st.session_state['trimming_dict_maxes'] = {
@@ -987,10 +990,10 @@ with tab2:
987
  min_lineup_edge = st.number_input("Min acceptable Lineup Edge?", value=-.5, min_value=-1.00, step=.001)
988
  if sport_var in ['NFL', 'MLB', 'NHL']:
989
  stack_include_toggle = st.selectbox("Include specific stacks?", options=['All Stacks', 'Specific Stacks'], index=0)
990
- stack_selections = st.multiselect("If Specific Stacks, Which to include?", options=sorted(list(set(stack_dict.values()))), default=[])
991
 
992
  stack_remove_toggle = st.selectbox("Remove specific stacks?", options=['No', 'Yes'], index=0)
993
- stack_remove = st.multiselect("If Specific Stacks, Which to remove?", options=sorted(list(set(stack_dict.values()))), default=[])
994
 
995
  submitted = st.form_submit_button("Submit")
996
 
@@ -1096,14 +1099,14 @@ with tab2:
1096
  with min_sort:
1097
  performance_threshold_low = st.number_input("Min", value=0.0, min_value=0.0, step=1.0, key='min_sort')
1098
  with max_sort:
1099
- performance_threshold_high = st.number_input("Max", value=st.session_state['trimming_dict_maxes'][performance_type], min_value=0.0, step=1.0, key='max_sort')
1100
 
1101
  st.write("Trimming threshold range:")
1102
  min_trim, max_trim = st.columns(2)
1103
  with min_trim:
1104
  own_threshold_low = st.number_input("Min", value=0.0, min_value=0.0, step=1.0, key='min_trim')
1105
  with max_trim:
1106
- own_threshold_high = st.number_input("Max", value=st.session_state['trimming_dict_maxes'][own_type], min_value=0.0, step=1.0, key='max_trim')
1107
 
1108
  submitted = st.form_submit_button("Trim")
1109
  if submitted:
@@ -1134,7 +1137,16 @@ with tab2:
1134
  parsed_frame = reduce_volatility_preset(st.session_state['working_frame'], lineup_target, excluded_cols, sport_var)
1135
  st.session_state['working_frame'] = parsed_frame.reset_index(drop=True)
1136
  st.session_state['export_merge'] = st.session_state['working_frame'].copy()
1137
-
 
 
 
 
 
 
 
 
 
1138
  with st.container():
1139
  if 'export_base' not in st.session_state:
1140
  st.session_state['export_base'] = pd.DataFrame(columns=st.session_state['working_frame'].columns)
@@ -1180,7 +1192,7 @@ with tab2:
1180
  display_frame = st.session_state['export_base']
1181
 
1182
  total_rows = len(display_frame)
1183
- rows_per_page = 500
1184
  total_pages = (total_rows + rows_per_page - 1) // rows_per_page # Ceiling division
1185
 
1186
  # Initialize page number in session state if not exists
@@ -1243,49 +1255,14 @@ with tab2:
1243
  )
1244
  player_stats_col, stack_stats_col, combos_col = st.tabs(['Player Stats', 'Stack Stats', 'Combos'])
1245
  with player_stats_col:
1246
-
1247
- player_stats = []
1248
- player_columns = [col for col in display_frame.columns if col not in excluded_cols]
1249
-
1250
- if st.session_state['settings_base'] and 'origin_player_exposures' in st.session_state and display_frame_source == 'Portfolio':
1251
- st.session_state['player_summary'] = st.session_state['origin_player_exposures']
1252
- else:
1253
- if type_var == 'Showdown':
1254
- for player in player_names:
1255
- # Create mask for lineups where this player is Captain (first column)
1256
- cpt_mask = display_frame[player_columns[0]] == player
1257
-
1258
- if cpt_mask.any():
1259
- player_stats.append({
1260
- 'Player': f"{player} (CPT)",
1261
- 'Lineup Count': cpt_mask.sum(),
1262
- 'Exposure': cpt_mask.sum() / len(display_frame),
1263
- 'Avg Median': display_frame[cpt_mask]['median'].mean(),
1264
- 'Avg Own': display_frame[cpt_mask]['Own'].mean(),
1265
- 'Avg Dupes': display_frame[cpt_mask]['Dupes'].mean(),
1266
- 'Avg Finish %': display_frame[cpt_mask]['Finish_percentile'].mean(),
1267
- 'Avg Lineup Edge': display_frame[cpt_mask]['Lineup Edge'].mean(),
1268
- })
1269
-
1270
- # Create mask for lineups where this player is FLEX (other columns)
1271
- flex_mask = display_frame[player_columns[1:]].apply(
1272
- lambda row: player in list(row), axis=1
1273
- )
1274
-
1275
- if flex_mask.any():
1276
- player_stats.append({
1277
- 'Player': f"{player} (FLEX)",
1278
- 'Lineup Count': flex_mask.sum(),
1279
- 'Exposure': flex_mask.sum() / len(display_frame),
1280
- 'Avg Median': display_frame[flex_mask]['median'].mean(),
1281
- 'Avg Own': display_frame[flex_mask]['Own'].mean(),
1282
- 'Avg Dupes': display_frame[flex_mask]['Dupes'].mean(),
1283
- 'Avg Finish %': display_frame[flex_mask]['Finish_percentile'].mean(),
1284
- 'Avg Lineup Edge': display_frame[flex_mask]['Lineup Edge'].mean(),
1285
- })
1286
  else:
1287
- if sport_var == 'CS2':
1288
- # Handle Captain positions
1289
  for player in player_names:
1290
  # Create mask for lineups where this player is Captain (first column)
1291
  cpt_mask = display_frame[player_columns[0]] == player
@@ -1318,77 +1295,69 @@ with tab2:
1318
  'Avg Finish %': display_frame[flex_mask]['Finish_percentile'].mean(),
1319
  'Avg Lineup Edge': display_frame[flex_mask]['Lineup Edge'].mean(),
1320
  })
1321
- elif sport_var != 'CS2':
1322
- # Original Classic format processing
1323
- for player in player_names:
1324
- player_mask = display_frame[player_columns].apply(
1325
- lambda row: player in list(row), axis=1
1326
- )
1327
-
1328
- if player_mask.any():
1329
- player_stats.append({
1330
- 'Player': player,
1331
- 'Lineup Count': player_mask.sum(),
1332
- 'Exposure': player_mask.sum() / len(display_frame),
1333
- 'Avg Median': display_frame[player_mask]['median'].mean(),
1334
- 'Avg Own': display_frame[player_mask]['Own'].mean(),
1335
- 'Avg Dupes': display_frame[player_mask]['Dupes'].mean(),
1336
- 'Avg Finish %': display_frame[player_mask]['Finish_percentile'].mean(),
1337
- 'Avg Lineup Edge': display_frame[player_mask]['Lineup Edge'].mean(),
1338
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1339
 
1340
- player_summary = pd.DataFrame(player_stats)
1341
- player_summary = player_summary.sort_values('Lineup Count', ascending=False)
1342
- st.session_state['player_summary'] = player_summary.copy()
1343
- if 'origin_player_exposures' not in st.session_state:
1344
- st.session_state['origin_player_exposures'] = player_summary.copy()
1345
-
1346
- st.subheader("Player Summary")
1347
- st.dataframe(
1348
- st.session_state['player_summary'].style
1349
- .background_gradient(axis=0).background_gradient(cmap='RdYlGn').background_gradient(cmap='RdYlGn_r', subset=['Avg Finish %', 'Avg Own', 'Avg Dupes'])
1350
- .format({
1351
- 'Avg Median': '{:.2f}',
1352
- 'Avg Own': '{:.2f}',
1353
- 'Avg Dupes': '{:.2f}',
1354
- 'Avg Finish %': '{:.2%}',
1355
- 'Avg Lineup Edge': '{:.2%}',
1356
- 'Exposure': '{:.2%}'
1357
- }),
1358
- height=400,
1359
- use_container_width=True
1360
- )
1361
-
1362
- with stack_stats_col:
1363
- if 'Stack' in display_frame.columns:
1364
- stack_stats = []
1365
- stack_columns = [col for col in display_frame.columns if col.startswith('Stack')]
1366
-
1367
- if st.session_state['settings_base'] and 'origin_stack_exposures' in st.session_state and display_frame_source == 'Portfolio':
1368
- st.session_state['stack_summary'] = st.session_state['origin_stack_exposures']
1369
- else:
1370
- for stack in stack_dict.values():
1371
- stack_mask = display_frame['Stack'] == stack
1372
- if stack_mask.any():
1373
- stack_stats.append({
1374
- 'Stack': stack,
1375
- 'Lineup Count': stack_mask.sum(),
1376
- 'Exposure': stack_mask.sum() / len(display_frame),
1377
- 'Avg Median': display_frame[stack_mask]['median'].mean(),
1378
- 'Avg Own': display_frame[stack_mask]['Own'].mean(),
1379
- 'Avg Dupes': display_frame[stack_mask]['Dupes'].mean(),
1380
- 'Avg Finish %': display_frame[stack_mask]['Finish_percentile'].mean(),
1381
- 'Avg Lineup Edge': display_frame[stack_mask]['Lineup Edge'].mean(),
1382
- })
1383
- stack_summary = pd.DataFrame(stack_stats)
1384
- stack_summary = stack_summary.sort_values('Lineup Count', ascending=False).drop_duplicates()
1385
- st.session_state['stack_summary'] = stack_summary.copy()
1386
- if 'origin_stack_exposures' not in st.session_state:
1387
- st.session_state['origin_stack_exposures'] = stack_summary.copy()
1388
-
1389
- st.subheader("Stack Summary")
1390
  st.dataframe(
1391
- st.session_state['stack_summary'].style
1392
  .background_gradient(axis=0).background_gradient(cmap='RdYlGn').background_gradient(cmap='RdYlGn_r', subset=['Avg Finish %', 'Avg Own', 'Avg Dupes'])
1393
  .format({
1394
  'Avg Median': '{:.2f}',
@@ -1401,6 +1370,50 @@ with tab2:
1401
  height=400,
1402
  use_container_width=True
1403
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1404
  else:
1405
  stack_summary = pd.DataFrame(columns=['Stack', 'Lineup Count', 'Avg Median', 'Avg Own', 'Avg Dupes', 'Avg Finish %', 'Avg Lineup Edge'])
1406
 
 
4
  from rapidfuzz import process
5
  import random
6
  from collections import Counter
7
+ import io
8
 
9
  ## import global functions
10
  from global_func.clean_player_name import clean_player_name
 
24
  from global_func.volatility_preset import volatility_preset
25
  from global_func.reduce_volatility_preset import reduce_volatility_preset
26
  from global_func.analyze_player_combos import analyze_player_combos
27
+ from global_func.stratification_function import stratification_function
28
 
29
  freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
30
  stacking_sports = ['MLB', 'NHL', 'NFL']
 
130
  else:
131
  stack_dict = None
132
  if st.session_state['portfolio'] is not None:
133
+
134
  st.success('Portfolio file loaded successfully!')
135
  st.session_state['portfolio'] = st.session_state['portfolio'].apply(lambda x: x.replace(player_wrong_names_mlb, player_right_names_mlb))
136
  st.dataframe(st.session_state['portfolio'].head(10))
 
183
 
184
  projections = projections.apply(lambda x: x.replace(player_wrong_names_mlb, player_right_names_mlb))
185
  st.dataframe(projections.head(10))
186
+
187
  if portfolio_file and projections_file:
188
  if st.session_state['portfolio'] is not None and projections is not None:
189
+
190
  st.subheader("Name Matching Analysis")
191
  # Initialize projections_df in session state if it doesn't exist
192
  # Get unique names from portfolio
 
285
  ).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row[2:]) else 0,
286
  axis=1
287
  )
288
+ st.session_state['stack_dict'] = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Stack']))
289
+ st.session_state['size_dict'] = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Size']))
290
 
 
291
  try:
292
  st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Name'], st.session_state['csv_file']['Name + ID']))
293
  except:
294
  st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Nickname'], st.session_state['csv_file']['Id']))
295
+ if 'map_dict' not in st.session_state:
296
+ if site_var == 'Draftkings':
297
+ if type_var == 'Classic':
298
+ if sport_var == 'CS2':
299
+ st.session_state['map_dict'] = {
300
+ 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
301
+ 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
302
+ 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
303
+ 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
304
+ 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
305
+ 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
306
+ 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'] * 1.5)),
307
+ 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
308
+ 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
309
+ }
310
+ elif sport_var != 'CS2':
311
+ st.session_state['map_dict'] = {
312
+ 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
313
+ 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
314
+ 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
315
+ 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
316
+ 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
317
+ 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
318
+ 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
319
+ 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
320
+ 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
321
+ }
322
+ elif type_var == 'Showdown':
323
+ if sport_var == 'GOLF':
324
+ st.session_state['map_dict'] = {
325
+ 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
326
+ 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
327
+ 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
328
+ 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
329
+ 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
330
+ 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
331
+ 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
332
+ 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
333
+ 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership']))
334
+ }
335
+ if sport_var != 'GOLF':
336
+ st.session_state['map_dict'] = {
337
+ 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
338
+ 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
339
+ 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
340
+ 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
341
+ 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
342
+ 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
343
+ 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'] * 1.5)),
344
+ 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
345
+ 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
346
+ }
347
+ elif site_var == 'Fanduel':
348
+ st.session_state['map_dict'] = {
349
+ 'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
350
+ 'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
351
+ 'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
352
+ 'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
353
+ 'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
354
+ 'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
355
+ 'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
356
+ 'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
357
+ 'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
358
+ }
359
  st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
360
+ buffer = io.BytesIO()
361
+ st.session_state['portfolio'].to_parquet(buffer, compression='snappy')
362
+ st.session_state['origin_portfolio'] = buffer.getvalue()
363
+ del st.session_state['portfolio']
364
 
365
  # with tab2:
366
  # if st.button('Clear data', key='reset2'):
 
875
  # )
876
 
877
  with tab2:
878
+ if 'origin_portfolio' in st.session_state and 'projections_df' in st.session_state:
879
  with st.container():
880
  col1, col2 = st.columns(2)
881
  with col1:
 
899
 
900
  if 'working_frame' not in st.session_state:
901
  st.session_state['settings_base'] = True
902
+ st.session_state['working_frame'] = pd.read_parquet(io.BytesIO(st.session_state['origin_portfolio']))
903
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
904
  if type_var == 'Classic':
905
  if sport_var == 'CS2':
906
  # Calculate salary (CPT uses cpt_salary_map, others use salary_map)
 
928
  st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
929
  st.session_state['working_frame']['median'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
930
  st.session_state['working_frame']['Own'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
931
+ if 'stack_dict' in st.session_state:
932
+ st.session_state['working_frame']['Stack'] = st.session_state['working_frame'].index.map(st.session_state['stack_dict'])
933
+ st.session_state['working_frame']['Size'] = st.session_state['working_frame'].index.map(st.session_state['size_dict'])
934
  elif type_var == 'Showdown':
935
  # Calculate salary (CPT uses cpt_salary_map, others use salary_map)
936
  st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(
 
952
  sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
953
  axis=1
954
  )
955
+ st.session_state['working_frame']['Own'] = st.session_state['working_frame']['Own'].astype('float32')
956
+ st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
957
+ st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
958
+
959
  st.session_state['base_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
960
  st.session_state['working_frame'] = st.session_state['base_frame'].copy()
961
  # st.session_state['highest_owned_teams'] = st.session_state['projections_df'][~st.session_state['projections_df']['position'].isin(['P', 'SP'])].groupby('team')['ownership'].sum().sort_values(ascending=False).head(3).index.tolist()
962
  # st.session_state['highest_owned_pitchers'] = st.session_state['projections_df'][st.session_state['projections_df']['position'].isin(['P', 'SP'])]['player_names'].sort_values(by='ownership', ascending=False).head(3).tolist()
 
 
 
 
 
 
 
 
 
 
963
 
964
  if 'trimming_dict_maxes' not in st.session_state:
965
  st.session_state['trimming_dict_maxes'] = {
 
990
  min_lineup_edge = st.number_input("Min acceptable Lineup Edge?", value=-.5, min_value=-1.00, step=.001)
991
  if sport_var in ['NFL', 'MLB', 'NHL']:
992
  stack_include_toggle = st.selectbox("Include specific stacks?", options=['All Stacks', 'Specific Stacks'], index=0)
993
+ stack_selections = st.multiselect("If Specific Stacks, Which to include?", options=sorted(list(set(st.session_state['stack_dict'].values()))), default=[])
994
 
995
  stack_remove_toggle = st.selectbox("Remove specific stacks?", options=['No', 'Yes'], index=0)
996
+ stack_remove = st.multiselect("If Specific Stacks, Which to remove?", options=sorted(list(set(st.session_state['stack_dict'].values()))), default=[])
997
 
998
  submitted = st.form_submit_button("Submit")
999
 
 
1099
  with min_sort:
1100
  performance_threshold_low = st.number_input("Min", value=0.0, min_value=0.0, step=1.0, key='min_sort')
1101
  with max_sort:
1102
+ performance_threshold_high = st.number_input("Max", value=float(st.session_state['trimming_dict_maxes'][performance_type]), min_value=0.0, step=1.0, key='max_sort')
1103
 
1104
  st.write("Trimming threshold range:")
1105
  min_trim, max_trim = st.columns(2)
1106
  with min_trim:
1107
  own_threshold_low = st.number_input("Min", value=0.0, min_value=0.0, step=1.0, key='min_trim')
1108
  with max_trim:
1109
+ own_threshold_high = st.number_input("Max", value=float(st.session_state['trimming_dict_maxes'][own_type]), min_value=0.0, step=1.0, key='max_trim')
1110
 
1111
  submitted = st.form_submit_button("Trim")
1112
  if submitted:
 
1137
  parsed_frame = reduce_volatility_preset(st.session_state['working_frame'], lineup_target, excluded_cols, sport_var)
1138
  st.session_state['working_frame'] = parsed_frame.reset_index(drop=True)
1139
  st.session_state['export_merge'] = st.session_state['working_frame'].copy()
1140
+ with st.expander('Stratify'):
1141
+ with st.form(key='Stratification'):
1142
+ sorting_choice = st.selectbox("Stat Choice", options=['median', 'Own', 'Weighted Own', 'Geomean', 'Lineup Edge', 'Finish_percentile', 'Diversity'], index=0)
1143
+ lineup_target = st.number_input("Lineups to produce", value=150, min_value=1, step=1)
1144
+ submitted = st.form_submit_button("Submit")
1145
+ if submitted:
1146
+ st.session_state['settings_base'] = False
1147
+ parsed_frame = stratification_function(st.session_state['working_frame'], lineup_target, excluded_cols, sport_var, sorting_choice)
1148
+ st.session_state['working_frame'] = parsed_frame.reset_index(drop=True)
1149
+ st.session_state['export_merge'] = st.session_state['working_frame'].copy()
1150
  with st.container():
1151
  if 'export_base' not in st.session_state:
1152
  st.session_state['export_base'] = pd.DataFrame(columns=st.session_state['working_frame'].columns)
 
1192
  display_frame = st.session_state['export_base']
1193
 
1194
  total_rows = len(display_frame)
1195
+ rows_per_page = 100
1196
  total_pages = (total_rows + rows_per_page - 1) // rows_per_page # Ceiling division
1197
 
1198
  # Initialize page number in session state if not exists
 
1255
  )
1256
  player_stats_col, stack_stats_col, combos_col = st.tabs(['Player Stats', 'Stack Stats', 'Combos'])
1257
  with player_stats_col:
1258
+ if st.button("Analyze Players", key='analyze_players'):
1259
+ player_stats = []
1260
+ player_columns = [col for col in display_frame.columns if col not in excluded_cols]
1261
+
1262
+ if st.session_state['settings_base'] and 'origin_player_exposures' in st.session_state and display_frame_source == 'Portfolio':
1263
+ st.session_state['player_summary'] = st.session_state['origin_player_exposures']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1264
  else:
1265
+ if type_var == 'Showdown':
 
1266
  for player in player_names:
1267
  # Create mask for lineups where this player is Captain (first column)
1268
  cpt_mask = display_frame[player_columns[0]] == player
 
1295
  'Avg Finish %': display_frame[flex_mask]['Finish_percentile'].mean(),
1296
  'Avg Lineup Edge': display_frame[flex_mask]['Lineup Edge'].mean(),
1297
  })
1298
+ else:
1299
+ if sport_var == 'CS2':
1300
+ # Handle Captain positions
1301
+ for player in player_names:
1302
+ # Create mask for lineups where this player is Captain (first column)
1303
+ cpt_mask = display_frame[player_columns[0]] == player
1304
+
1305
+ if cpt_mask.any():
1306
+ player_stats.append({
1307
+ 'Player': f"{player} (CPT)",
1308
+ 'Lineup Count': cpt_mask.sum(),
1309
+ 'Exposure': cpt_mask.sum() / len(display_frame),
1310
+ 'Avg Median': display_frame[cpt_mask]['median'].mean(),
1311
+ 'Avg Own': display_frame[cpt_mask]['Own'].mean(),
1312
+ 'Avg Dupes': display_frame[cpt_mask]['Dupes'].mean(),
1313
+ 'Avg Finish %': display_frame[cpt_mask]['Finish_percentile'].mean(),
1314
+ 'Avg Lineup Edge': display_frame[cpt_mask]['Lineup Edge'].mean(),
1315
+ })
1316
+
1317
+ # Create mask for lineups where this player is FLEX (other columns)
1318
+ flex_mask = display_frame[player_columns[1:]].apply(
1319
+ lambda row: player in list(row), axis=1
1320
+ )
1321
+
1322
+ if flex_mask.any():
1323
+ player_stats.append({
1324
+ 'Player': f"{player} (FLEX)",
1325
+ 'Lineup Count': flex_mask.sum(),
1326
+ 'Exposure': flex_mask.sum() / len(display_frame),
1327
+ 'Avg Median': display_frame[flex_mask]['median'].mean(),
1328
+ 'Avg Own': display_frame[flex_mask]['Own'].mean(),
1329
+ 'Avg Dupes': display_frame[flex_mask]['Dupes'].mean(),
1330
+ 'Avg Finish %': display_frame[flex_mask]['Finish_percentile'].mean(),
1331
+ 'Avg Lineup Edge': display_frame[flex_mask]['Lineup Edge'].mean(),
1332
+ })
1333
+ elif sport_var != 'CS2':
1334
+ # Original Classic format processing
1335
+ for player in player_names:
1336
+ player_mask = display_frame[player_columns].apply(
1337
+ lambda row: player in list(row), axis=1
1338
+ )
1339
+
1340
+ if player_mask.any():
1341
+ player_stats.append({
1342
+ 'Player': player,
1343
+ 'Lineup Count': player_mask.sum(),
1344
+ 'Exposure': player_mask.sum() / len(display_frame),
1345
+ 'Avg Median': display_frame[player_mask]['median'].mean(),
1346
+ 'Avg Own': display_frame[player_mask]['Own'].mean(),
1347
+ 'Avg Dupes': display_frame[player_mask]['Dupes'].mean(),
1348
+ 'Avg Finish %': display_frame[player_mask]['Finish_percentile'].mean(),
1349
+ 'Avg Lineup Edge': display_frame[player_mask]['Lineup Edge'].mean(),
1350
+ })
1351
+
1352
+ player_summary = pd.DataFrame(player_stats)
1353
+ player_summary = player_summary.sort_values('Lineup Count', ascending=False)
1354
+ st.session_state['player_summary'] = player_summary.copy()
1355
+ if 'origin_player_exposures' not in st.session_state:
1356
+ st.session_state['origin_player_exposures'] = player_summary.copy()
1357
 
1358
+ st.subheader("Player Summary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1359
  st.dataframe(
1360
+ st.session_state['player_summary'].style
1361
  .background_gradient(axis=0).background_gradient(cmap='RdYlGn').background_gradient(cmap='RdYlGn_r', subset=['Avg Finish %', 'Avg Own', 'Avg Dupes'])
1362
  .format({
1363
  'Avg Median': '{:.2f}',
 
1370
  height=400,
1371
  use_container_width=True
1372
  )
1373
+
1374
+ with stack_stats_col:
1375
+ if 'Stack' in display_frame.columns:
1376
+ if st.button("Analyze Stacks", key='analyze_stacks'):
1377
+ stack_stats = []
1378
+ stack_columns = [col for col in display_frame.columns if col.startswith('Stack')]
1379
+
1380
+ if st.session_state['settings_base'] and 'origin_stack_exposures' in st.session_state and display_frame_source == 'Portfolio':
1381
+ st.session_state['stack_summary'] = st.session_state['origin_stack_exposures']
1382
+ else:
1383
+ for stack in st.session_state['stack_dict'].values():
1384
+ stack_mask = display_frame['Stack'] == stack
1385
+ if stack_mask.any():
1386
+ stack_stats.append({
1387
+ 'Stack': stack,
1388
+ 'Lineup Count': stack_mask.sum(),
1389
+ 'Exposure': stack_mask.sum() / len(display_frame),
1390
+ 'Avg Median': display_frame[stack_mask]['median'].mean(),
1391
+ 'Avg Own': display_frame[stack_mask]['Own'].mean(),
1392
+ 'Avg Dupes': display_frame[stack_mask]['Dupes'].mean(),
1393
+ 'Avg Finish %': display_frame[stack_mask]['Finish_percentile'].mean(),
1394
+ 'Avg Lineup Edge': display_frame[stack_mask]['Lineup Edge'].mean(),
1395
+ })
1396
+ stack_summary = pd.DataFrame(stack_stats)
1397
+ stack_summary = stack_summary.sort_values('Lineup Count', ascending=False).drop_duplicates()
1398
+ st.session_state['stack_summary'] = stack_summary.copy()
1399
+ if 'origin_stack_exposures' not in st.session_state:
1400
+ st.session_state['origin_stack_exposures'] = stack_summary.copy()
1401
+
1402
+ st.subheader("Stack Summary")
1403
+ st.dataframe(
1404
+ st.session_state['stack_summary'].style
1405
+ .background_gradient(axis=0).background_gradient(cmap='RdYlGn').background_gradient(cmap='RdYlGn_r', subset=['Avg Finish %', 'Avg Own', 'Avg Dupes'])
1406
+ .format({
1407
+ 'Avg Median': '{:.2f}',
1408
+ 'Avg Own': '{:.2f}',
1409
+ 'Avg Dupes': '{:.2f}',
1410
+ 'Avg Finish %': '{:.2%}',
1411
+ 'Avg Lineup Edge': '{:.2%}',
1412
+ 'Exposure': '{:.2%}'
1413
+ }),
1414
+ height=400,
1415
+ use_container_width=True
1416
+ )
1417
  else:
1418
  stack_summary = pd.DataFrame(columns=['Stack', 'Lineup Count', 'Avg Median', 'Avg Own', 'Avg Dupes', 'Avg Finish %', 'Avg Lineup Edge'])
1419
 
global_func/stratification_function.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude_cols: list, sport: str, sorting_choice: str):
5
+ excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
6
+ player_columns = [col for col in portfolio.columns if col not in excluded_cols]
7
+
8
+ concat_portfolio = portfolio.copy()
9
+ if sorting_choice == 'Finish_percentile':
10
+ concat_portfolio = concat_portfolio.sort_values(by=sorting_choice, ascending=True).reset_index(drop=True)
11
+ else:
12
+ concat_portfolio = concat_portfolio.sort_values(by=sorting_choice, ascending=False).reset_index(drop=True)
13
+
14
+ # Calculate target similarity scores for linear progression
15
+ similarity_floor = concat_portfolio[sorting_choice].min()
16
+ similarity_ceiling = concat_portfolio[sorting_choice].max()
17
+
18
+ # Create evenly spaced target similarity scores
19
+ target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)
20
+
21
+ # Find the closest lineup to each target similarity score
22
+ selected_indices = []
23
+ for target_sim in target_similarities:
24
+ # Find the index of the closest similarity score
25
+ closest_idx = (concat_portfolio[sorting_choice] - target_sim).abs().idxmin()
26
+ if closest_idx not in selected_indices: # Avoid duplicates
27
+ selected_indices.append(closest_idx)
28
+
29
+ # Select the lineups
30
+ concat_portfolio = concat_portfolio.loc[selected_indices].reset_index(drop=True)
31
+
32
+ return concat_portfolio.sort_values(by=sorting_choice, ascending=False)