James McCool
commited on
Commit
·
68208b0
1
Parent(s):
9cec9e7
Add player combination analysis feature in app.py and implement analyze_player_combos function. This update introduces a new tab for analyzing player combinations, allowing users to select combo sizes and view statistics on the most common player pairings or triplets, enhancing the analytical capabilities of the application.
Browse files- app.py +37 -2
- global_func/analyze_player_combos.py +92 -0
app.py
CHANGED
@@ -22,6 +22,7 @@ from global_func.large_field_preset import large_field_preset
|
|
22 |
from global_func.hedging_preset import hedging_preset
|
23 |
from global_func.volatility_preset import volatility_preset
|
24 |
from global_func.reduce_volatility_preset import reduce_volatility_preset
|
|
|
25 |
|
26 |
freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
|
27 |
stacking_sports = ['MLB', 'NHL', 'NFL']
|
@@ -1240,7 +1241,7 @@ with tab2:
|
|
1240 |
use_container_width=True,
|
1241 |
hide_index=True
|
1242 |
)
|
1243 |
-
player_stats_col, stack_stats_col = st.tabs(['Player Stats', 'Stack Stats'])
|
1244 |
with player_stats_col:
|
1245 |
|
1246 |
player_stats = []
|
@@ -1401,4 +1402,38 @@ with tab2:
|
|
1401 |
use_container_width=True
|
1402 |
)
|
1403 |
else:
|
1404 |
-
stack_summary = pd.DataFrame(columns=['Stack', 'Lineup Count', 'Avg Median', 'Avg Own', 'Avg Dupes', 'Avg Finish %', 'Avg Lineup Edge'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
from global_func.hedging_preset import hedging_preset
|
23 |
from global_func.volatility_preset import volatility_preset
|
24 |
from global_func.reduce_volatility_preset import reduce_volatility_preset
|
25 |
+
from global_func.analyze_player_combos import analyze_player_combos
|
26 |
|
27 |
freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
|
28 |
stacking_sports = ['MLB', 'NHL', 'NFL']
|
|
|
1241 |
use_container_width=True,
|
1242 |
hide_index=True
|
1243 |
)
|
1244 |
+
player_stats_col, stack_stats_col, combos_col = st.tabs(['Player Stats', 'Stack Stats', 'Combos'])
|
1245 |
with player_stats_col:
|
1246 |
|
1247 |
player_stats = []
|
|
|
1402 |
use_container_width=True
|
1403 |
)
|
1404 |
else:
|
1405 |
+
stack_summary = pd.DataFrame(columns=['Stack', 'Lineup Count', 'Avg Median', 'Avg Own', 'Avg Dupes', 'Avg Finish %', 'Avg Lineup Edge'])
|
1406 |
+
|
1407 |
+
with combos_col:
|
1408 |
+
st.subheader("Player Combinations")
|
1409 |
+
|
1410 |
+
# Add controls for combo analysis
|
1411 |
+
col1, col2 = st.columns(2)
|
1412 |
+
with col1:
|
1413 |
+
combo_size = st.selectbox("Combo Size", [2, 3], key='combo_size')
|
1414 |
+
with col2:
|
1415 |
+
if st.button("Analyze Combos", key='analyze_combos'):
|
1416 |
+
st.session_state['combo_analysis'] = analyze_player_combos(
|
1417 |
+
display_frame, excluded_cols, combo_size
|
1418 |
+
)
|
1419 |
+
|
1420 |
+
# Display results
|
1421 |
+
if 'combo_analysis' in st.session_state:
|
1422 |
+
st.dataframe(
|
1423 |
+
st.session_state['combo_analysis'].style
|
1424 |
+
.background_gradient(axis=0)
|
1425 |
+
.background_gradient(cmap='RdYlGn')
|
1426 |
+
.background_gradient(cmap='RdYlGn_r', subset=['Avg Finish %', 'Avg Own', 'Avg Dupes'])
|
1427 |
+
.format({
|
1428 |
+
'Avg Median': '{:.2f}',
|
1429 |
+
'Avg Own': '{:.2f}',
|
1430 |
+
'Avg Dupes': '{:.2f}',
|
1431 |
+
'Avg Finish %': '{:.2%}',
|
1432 |
+
'Avg Lineup Edge': '{:.2%}',
|
1433 |
+
'Exposure': '{:.2%}'
|
1434 |
+
}),
|
1435 |
+
height=400,
|
1436 |
+
use_container_width=True
|
1437 |
+
)
|
1438 |
+
else:
|
1439 |
+
st.info("Click 'Analyze Combos' to see the most common player combinations.")
|
global_func/analyze_player_combos.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from collections import Counter
|
4 |
+
|
5 |
+
def analyze_player_combinations(display_frame, excluded_cols, combo_size=2):
|
6 |
+
"""
|
7 |
+
Analyze the most common player combinations in a DataFrame.
|
8 |
+
Optimized for large datasets by using vectorized operations.
|
9 |
+
|
10 |
+
Args:
|
11 |
+
display_frame: DataFrame containing lineup data
|
12 |
+
excluded_cols: List of columns to exclude from analysis
|
13 |
+
combo_size: Size of combinations to analyze (2 or 3)
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
DataFrame with combo statistics
|
17 |
+
"""
|
18 |
+
# Get player columns
|
19 |
+
player_columns = [col for col in display_frame.columns if col not in excluded_cols]
|
20 |
+
|
21 |
+
# Extract player data and convert to sets for each row
|
22 |
+
player_sets = []
|
23 |
+
for _, row in display_frame.iterrows():
|
24 |
+
# Get unique players in this row (handles duplicates)
|
25 |
+
players = set()
|
26 |
+
for col in player_columns:
|
27 |
+
player = row[col]
|
28 |
+
if pd.notna(player) and str(player).strip() != '':
|
29 |
+
players.add(str(player))
|
30 |
+
player_sets.append(players)
|
31 |
+
|
32 |
+
# Count combinations using Counter
|
33 |
+
combo_counter = Counter()
|
34 |
+
|
35 |
+
for player_set in player_sets:
|
36 |
+
if len(player_set) >= combo_size:
|
37 |
+
# Convert set to sorted tuple for consistent hashing
|
38 |
+
players_list = sorted(list(player_set))
|
39 |
+
|
40 |
+
# Generate combinations using a more efficient approach
|
41 |
+
if combo_size == 2:
|
42 |
+
# For 2-player combos, use nested loops (faster than itertools for small n)
|
43 |
+
for i in range(len(players_list)):
|
44 |
+
for j in range(i + 1, len(players_list)):
|
45 |
+
combo = (players_list[i], players_list[j])
|
46 |
+
combo_counter[combo] += 1
|
47 |
+
elif combo_size == 3:
|
48 |
+
# For 3-player combos, use nested loops
|
49 |
+
for i in range(len(players_list)):
|
50 |
+
for j in range(i + 1, len(players_list)):
|
51 |
+
for k in range(j + 1, len(players_list)):
|
52 |
+
combo = (players_list[i], players_list[j], players_list[k])
|
53 |
+
combo_counter[combo] += 1
|
54 |
+
|
55 |
+
# Convert to DataFrame
|
56 |
+
combo_stats = []
|
57 |
+
total_lineups = len(display_frame)
|
58 |
+
|
59 |
+
for combo, count in combo_counter.most_common(50): # Top 50 combos
|
60 |
+
exposure = count / total_lineups
|
61 |
+
|
62 |
+
# Calculate average stats for lineups containing this combo
|
63 |
+
combo_mask = []
|
64 |
+
for player_set in player_sets:
|
65 |
+
if all(player in player_set for player in combo):
|
66 |
+
combo_mask.append(True)
|
67 |
+
else:
|
68 |
+
combo_mask.append(False)
|
69 |
+
|
70 |
+
combo_mask = np.array(combo_mask)
|
71 |
+
|
72 |
+
if combo_mask.any():
|
73 |
+
avg_median = display_frame.loc[combo_mask, 'median'].mean()
|
74 |
+
avg_own = display_frame.loc[combo_mask, 'Own'].mean()
|
75 |
+
avg_dupes = display_frame.loc[combo_mask, 'Dupes'].mean()
|
76 |
+
avg_finish = display_frame.loc[combo_mask, 'Finish_percentile'].mean()
|
77 |
+
avg_edge = display_frame.loc[combo_mask, 'Lineup Edge'].mean()
|
78 |
+
else:
|
79 |
+
avg_median = avg_own = avg_dupes = avg_finish = avg_edge = 0
|
80 |
+
|
81 |
+
combo_stats.append({
|
82 |
+
'Combo': ' + '.join(combo),
|
83 |
+
'Lineup Count': count,
|
84 |
+
'Exposure': exposure,
|
85 |
+
'Avg Median': avg_median,
|
86 |
+
'Avg Own': avg_own,
|
87 |
+
'Avg Dupes': avg_dupes,
|
88 |
+
'Avg Finish %': avg_finish,
|
89 |
+
'Avg Lineup Edge': avg_edge
|
90 |
+
})
|
91 |
+
|
92 |
+
return pd.DataFrame(combo_stats)
|