James McCool commited on
Commit
68208b0
·
1 Parent(s): 9cec9e7

Add player combination analysis feature in app.py and implement analyze_player_combos function. This update introduces a new tab for analyzing player combinations, allowing users to select combo sizes and view statistics on the most common player pairings or triplets, enhancing the analytical capabilities of the application.

Browse files
Files changed (2) hide show
  1. app.py +37 -2
  2. global_func/analyze_player_combos.py +92 -0
app.py CHANGED
@@ -22,6 +22,7 @@ from global_func.large_field_preset import large_field_preset
22
  from global_func.hedging_preset import hedging_preset
23
  from global_func.volatility_preset import volatility_preset
24
  from global_func.reduce_volatility_preset import reduce_volatility_preset
 
25
 
26
  freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
27
  stacking_sports = ['MLB', 'NHL', 'NFL']
@@ -1240,7 +1241,7 @@ with tab2:
1240
  use_container_width=True,
1241
  hide_index=True
1242
  )
1243
- player_stats_col, stack_stats_col = st.tabs(['Player Stats', 'Stack Stats'])
1244
  with player_stats_col:
1245
 
1246
  player_stats = []
@@ -1401,4 +1402,38 @@ with tab2:
1401
  use_container_width=True
1402
  )
1403
  else:
1404
- stack_summary = pd.DataFrame(columns=['Stack', 'Lineup Count', 'Avg Median', 'Avg Own', 'Avg Dupes', 'Avg Finish %', 'Avg Lineup Edge'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  from global_func.hedging_preset import hedging_preset
23
  from global_func.volatility_preset import volatility_preset
24
  from global_func.reduce_volatility_preset import reduce_volatility_preset
25
+ from global_func.analyze_player_combos import analyze_player_combos
26
 
27
  freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
28
  stacking_sports = ['MLB', 'NHL', 'NFL']
 
1241
  use_container_width=True,
1242
  hide_index=True
1243
  )
1244
+ player_stats_col, stack_stats_col, combos_col = st.tabs(['Player Stats', 'Stack Stats', 'Combos'])
1245
  with player_stats_col:
1246
 
1247
  player_stats = []
 
1402
  use_container_width=True
1403
  )
1404
  else:
1405
+ stack_summary = pd.DataFrame(columns=['Stack', 'Lineup Count', 'Avg Median', 'Avg Own', 'Avg Dupes', 'Avg Finish %', 'Avg Lineup Edge'])
1406
+
1407
+ with combos_col:
1408
+ st.subheader("Player Combinations")
1409
+
1410
+ # Add controls for combo analysis
1411
+ col1, col2 = st.columns(2)
1412
+ with col1:
1413
+ combo_size = st.selectbox("Combo Size", [2, 3], key='combo_size')
1414
+ with col2:
1415
+ if st.button("Analyze Combos", key='analyze_combos'):
1416
+ st.session_state['combo_analysis'] = analyze_player_combos(
1417
+ display_frame, excluded_cols, combo_size
1418
+ )
1419
+
1420
+ # Display results
1421
+ if 'combo_analysis' in st.session_state:
1422
+ st.dataframe(
1423
+ st.session_state['combo_analysis'].style
1424
+ .background_gradient(axis=0)
1425
+ .background_gradient(cmap='RdYlGn')
1426
+ .background_gradient(cmap='RdYlGn_r', subset=['Avg Finish %', 'Avg Own', 'Avg Dupes'])
1427
+ .format({
1428
+ 'Avg Median': '{:.2f}',
1429
+ 'Avg Own': '{:.2f}',
1430
+ 'Avg Dupes': '{:.2f}',
1431
+ 'Avg Finish %': '{:.2%}',
1432
+ 'Avg Lineup Edge': '{:.2%}',
1433
+ 'Exposure': '{:.2%}'
1434
+ }),
1435
+ height=400,
1436
+ use_container_width=True
1437
+ )
1438
+ else:
1439
+ st.info("Click 'Analyze Combos' to see the most common player combinations.")
global_func/analyze_player_combos.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from collections import Counter
4
+
5
+ def analyze_player_combinations(display_frame, excluded_cols, combo_size=2):
6
+ """
7
+ Analyze the most common player combinations in a DataFrame.
8
+ Optimized for large datasets by using vectorized operations.
9
+
10
+ Args:
11
+ display_frame: DataFrame containing lineup data
12
+ excluded_cols: List of columns to exclude from analysis
13
+ combo_size: Size of combinations to analyze (2 or 3)
14
+
15
+ Returns:
16
+ DataFrame with combo statistics
17
+ """
18
+ # Get player columns
19
+ player_columns = [col for col in display_frame.columns if col not in excluded_cols]
20
+
21
+ # Extract player data and convert to sets for each row
22
+ player_sets = []
23
+ for _, row in display_frame.iterrows():
24
+ # Get unique players in this row (handles duplicates)
25
+ players = set()
26
+ for col in player_columns:
27
+ player = row[col]
28
+ if pd.notna(player) and str(player).strip() != '':
29
+ players.add(str(player))
30
+ player_sets.append(players)
31
+
32
+ # Count combinations using Counter
33
+ combo_counter = Counter()
34
+
35
+ for player_set in player_sets:
36
+ if len(player_set) >= combo_size:
37
+ # Convert set to sorted tuple for consistent hashing
38
+ players_list = sorted(list(player_set))
39
+
40
+ # Generate combinations using a more efficient approach
41
+ if combo_size == 2:
42
+ # For 2-player combos, use nested loops (faster than itertools for small n)
43
+ for i in range(len(players_list)):
44
+ for j in range(i + 1, len(players_list)):
45
+ combo = (players_list[i], players_list[j])
46
+ combo_counter[combo] += 1
47
+ elif combo_size == 3:
48
+ # For 3-player combos, use nested loops
49
+ for i in range(len(players_list)):
50
+ for j in range(i + 1, len(players_list)):
51
+ for k in range(j + 1, len(players_list)):
52
+ combo = (players_list[i], players_list[j], players_list[k])
53
+ combo_counter[combo] += 1
54
+
55
+ # Convert to DataFrame
56
+ combo_stats = []
57
+ total_lineups = len(display_frame)
58
+
59
+ for combo, count in combo_counter.most_common(50): # Top 50 combos
60
+ exposure = count / total_lineups
61
+
62
+ # Calculate average stats for lineups containing this combo
63
+ combo_mask = []
64
+ for player_set in player_sets:
65
+ if all(player in player_set for player in combo):
66
+ combo_mask.append(True)
67
+ else:
68
+ combo_mask.append(False)
69
+
70
+ combo_mask = np.array(combo_mask)
71
+
72
+ if combo_mask.any():
73
+ avg_median = display_frame.loc[combo_mask, 'median'].mean()
74
+ avg_own = display_frame.loc[combo_mask, 'Own'].mean()
75
+ avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean()
76
+ avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean()
77
+ avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean()
78
+ else:
79
+ avg_median = avg_own = avg_dupes = avg_finish = avg_edge = 0
80
+
81
+ combo_stats.append({
82
+ 'Combo': ' + '.join(combo),
83
+ 'Lineup Count': count,
84
+ 'Exposure': exposure,
85
+ 'Avg Median': avg_median,
86
+ 'Avg Own': avg_own,
87
+ 'Avg Dupes': avg_dupes,
88
+ 'Avg Finish %': avg_finish,
89
+ 'Avg Lineup Edge': avg_edge
90
+ })
91
+
92
+ return pd.DataFrame(combo_stats)