Jon Solow
commited on
Commit
·
9ec1831
1
Parent(s):
83482b1
Implement stat mapping
Browse files- src/queries/nflverse/github_data.py +19 -0
- src/stats.py +153 -0
src/queries/nflverse/github_data.py
CHANGED
@@ -89,10 +89,29 @@ def get_player_kicking_stats(season_int: int | str = SEASON) -> pd.DataFrame:
|
|
89 |
return df
|
90 |
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
NFLVERSE_ASSETS = [
|
93 |
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
|
94 |
("player_stats", f"player_stats_{SEASON}.parquet"),
|
95 |
("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
|
|
|
96 |
]
|
97 |
|
98 |
|
|
|
89 |
return df
|
90 |
|
91 |
|
92 |
+
def get_team_defense_stats(season_int: int | str = SEASON) -> pd.DataFrame:
|
93 |
+
df = duckdb.sql(
|
94 |
+
f"""
|
95 |
+
select
|
96 |
+
week
|
97 |
+
, team
|
98 |
+
, sum(def_sacks) as def_sacks
|
99 |
+
, sum(def_interceptions) as def_interceptions
|
100 |
+
, sum(def_tds) as def_tds
|
101 |
+
, sum(def_fumble_recovery_opp) as def_fumble_recovery_opp
|
102 |
+
, sum(def_safety) as def_safety
|
103 |
+
from player_stats_player_stats_def_{season_int}
|
104 |
+
group by week, team
|
105 |
+
"""
|
106 |
+
).df()
|
107 |
+
return df
|
108 |
+
|
109 |
+
|
110 |
NFLVERSE_ASSETS = [
|
111 |
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
|
112 |
("player_stats", f"player_stats_{SEASON}.parquet"),
|
113 |
("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
|
114 |
+
("player_stats", f"player_stats_def_{SEASON}.parquet"),
|
115 |
]
|
116 |
|
117 |
|
src/stats.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
from domain.playoffs import PLAYOFF_TEAM_DEF_PLAYER
|
5 |
+
from queries.nflverse.github_data import get_player_kicking_stats, get_player_stats, get_team_defense_stats
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass
|
9 |
+
class StatType:
|
10 |
+
key: str
|
11 |
+
score: float
|
12 |
+
|
13 |
+
|
14 |
+
RUSH_TD = StatType(key="RUSH TD", score=6.0)
|
15 |
+
REC_TD = StatType(key="REC TD", score=6.0)
|
16 |
+
OFF_FUM_TD = StatType(key="OFF FUM TD", score=6.0)
|
17 |
+
PASS_TD = StatType(key="PASS TD", score=4.0)
|
18 |
+
FG_0_49 = StatType(key="FG 0-49", score=3.0)
|
19 |
+
FG_50_ = StatType(key="FG 50+", score=5.0)
|
20 |
+
TWO_PT = StatType(key="2 PT", score=2.0)
|
21 |
+
RECEPTION = StatType(key="REC", score=1.0)
|
22 |
+
RUSH_YD = StatType(key="RUSH YD", score=0.1)
|
23 |
+
REC_YD = StatType(key="REC YD", score=0.1)
|
24 |
+
PASS_YD = StatType(key="PASS YD", score=0.04)
|
25 |
+
XP = StatType(key="XP", score=1.0)
|
26 |
+
FUM_LOST = StatType(key="FUM LOST", score=-2.0)
|
27 |
+
PASS_INT = StatType(key="PASS INT", score=-2.0)
|
28 |
+
RET_TD = StatType(key="RET TD", score=6.0)
|
29 |
+
DEF_TD = StatType(key="DEF TD", score=6.0)
|
30 |
+
DEF_INT = StatType(key="DEF INT", score=2.0)
|
31 |
+
FUM_REC = StatType(key="FUM REC", score=2.0)
|
32 |
+
SAFETY = StatType(key="SAFETY", score=2.0)
|
33 |
+
SACK = StatType(key="SACK", score=1.0)
|
34 |
+
PTS_ALLOW_0 = StatType(key="PTS ALLOW 0", score=10.0)
|
35 |
+
PTS_ALLOW_1_6 = StatType(key="PTS ALLOW 1-6", score=7.0)
|
36 |
+
PTS_ALLOW_7_13 = StatType(key="PTS ALLOW 7-13", score=4.0)
|
37 |
+
PTS_ALLOW_14_20 = StatType(key="PTS ALLOW 14-20", score=1.0)
|
38 |
+
PTS_ALLOW_21_27 = StatType(key="PTS ALLOW 21-27", score=0.0)
|
39 |
+
PTS_ALLOW_28_34 = StatType(key="PTS ALLOW 28-34", score=-1.0)
|
40 |
+
PTS_ALLOW_35_ = StatType(key="PTS ALLOW 35+", score=-4.0)
|
41 |
+
TEAM_WIN = StatType(key="TEAM WIN", score=5.0)
|
42 |
+
ST_TD = StatType(key="ST TD", score=6.0)
|
43 |
+
|
44 |
+
|
45 |
+
NFLVERSE_STAT_COL_TO_ID: dict[str, str] = {
|
46 |
+
"passing_tds": PASS_TD.key,
|
47 |
+
"pasing_yards": PASS_YD.key,
|
48 |
+
"passing_2pt_conversions": TWO_PT.key,
|
49 |
+
"sack_fumbles_lost": FUM_LOST.key,
|
50 |
+
"interceptions": PASS_INT.key,
|
51 |
+
"rushing_tds": RUSH_TD.key,
|
52 |
+
"rushing_yards": RUSH_YD.key,
|
53 |
+
"rushing_2pt_conversions": TWO_PT.key,
|
54 |
+
"rushing_fumbles_lost": FUM_LOST.key,
|
55 |
+
"receptions": RECEPTION.key,
|
56 |
+
"receiving_tds": REC_TD.key,
|
57 |
+
"receiving_yards": REC_YD.key,
|
58 |
+
"receiving_2pt_conversions": TWO_PT.key,
|
59 |
+
"receiving_fumbles_lost": FUM_LOST.key,
|
60 |
+
"special_teams_tds": ST_TD.key,
|
61 |
+
"pat_made": XP.key,
|
62 |
+
"fg_made_0_19": FG_0_49.key,
|
63 |
+
"fg_made_20_29": FG_0_49.key,
|
64 |
+
"fg_made_30_39": FG_0_49.key,
|
65 |
+
"fg_made_40_49": FG_0_49.key,
|
66 |
+
"fg_made_50_59": FG_50_.key,
|
67 |
+
"fg_made_60_": FG_50_.key,
|
68 |
+
"def_sacks": SACK.key,
|
69 |
+
"def_interceptions": DEF_INT.key,
|
70 |
+
"def_tds": DEF_TD.key,
|
71 |
+
"def_fumble_recovery_opp": FUM_REC.key,
|
72 |
+
"def_safety": SAFETY.key,
|
73 |
+
}
|
74 |
+
|
75 |
+
NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK = {
|
76 |
+
18: 1,
|
77 |
+
20: 2,
|
78 |
+
21: 3,
|
79 |
+
22: 4,
|
80 |
+
}
|
81 |
+
|
82 |
+
|
83 |
+
def add_stats_from_player_df_to_stat_map(df: pd.DataFrame, stat_map):
|
84 |
+
df_playoffs = df[df.week.isin(NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.keys())]
|
85 |
+
df_playoffs.week = df_playoffs.week.apply(lambda x: NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK[x])
|
86 |
+
for week_player_id_tuple, row in df_playoffs.set_index(["week", "player_id"]).iterrows():
|
87 |
+
if isinstance(week_player_id_tuple, tuple):
|
88 |
+
week, player_id = week_player_id_tuple
|
89 |
+
else:
|
90 |
+
# this won't happen but makes mypy happy
|
91 |
+
continue
|
92 |
+
player_stats: dict[str, float] = {}
|
93 |
+
for k, v in row.to_dict().items():
|
94 |
+
if k in NFLVERSE_STAT_COL_TO_ID:
|
95 |
+
if (mapped_k := NFLVERSE_STAT_COL_TO_ID[k]) in player_stats:
|
96 |
+
player_stats[mapped_k] += v
|
97 |
+
else:
|
98 |
+
player_stats[mapped_k] = v
|
99 |
+
|
100 |
+
if player_id not in stat_map[week]:
|
101 |
+
stat_map[week][player_id] = player_stats
|
102 |
+
else:
|
103 |
+
stat_map[week][player_id].update(player_stats)
|
104 |
+
|
105 |
+
|
106 |
+
def add_stats_from_team_def_df_to_stat_map(df: pd.DataFrame, stat_map):
|
107 |
+
short_team_names_to_player_id = {t.rosters_short_name: p for t, p in PLAYOFF_TEAM_DEF_PLAYER}
|
108 |
+
df_playoffs = df[
|
109 |
+
(df.week.isin(NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.keys()) & df.team.isin(short_team_names_to_player_id.keys()))
|
110 |
+
]
|
111 |
+
df_playoffs.week = df_playoffs.week.apply(lambda x: NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK[x])
|
112 |
+
|
113 |
+
for week_team_tuple, row in df_playoffs.set_index(["week", "team"]).iterrows():
|
114 |
+
if isinstance(week_team_tuple, tuple):
|
115 |
+
week, team = week_team_tuple
|
116 |
+
else:
|
117 |
+
# this won't happen but makes mypy happy
|
118 |
+
continue
|
119 |
+
player_stats: dict[str, float] = {}
|
120 |
+
player_id = short_team_names_to_player_id[team]
|
121 |
+
for k, v in row.to_dict().items():
|
122 |
+
if k in NFLVERSE_STAT_COL_TO_ID:
|
123 |
+
if (mapped_k := NFLVERSE_STAT_COL_TO_ID[k]) in player_stats:
|
124 |
+
player_stats[mapped_k] += v
|
125 |
+
else:
|
126 |
+
player_stats[mapped_k] = v
|
127 |
+
|
128 |
+
if player_id not in stat_map[week]:
|
129 |
+
stat_map[week][player_id] = player_stats
|
130 |
+
else:
|
131 |
+
stat_map[week][player_id].update(player_stats)
|
132 |
+
|
133 |
+
|
134 |
+
def assemble_nflverse_stats() -> dict[int, dict[str, dict[str, float]]]:
|
135 |
+
# map week -> player_id -> stat_key -> stat value
|
136 |
+
stat_map: dict[int, dict[str, dict[str, float]]] = {w: {} for w in NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.values()}
|
137 |
+
|
138 |
+
df_player_stats = get_player_stats()
|
139 |
+
df_kicking_stats = get_player_kicking_stats()
|
140 |
+
df_def_stats = get_team_defense_stats()
|
141 |
+
|
142 |
+
add_stats_from_player_df_to_stat_map(df_player_stats, stat_map)
|
143 |
+
add_stats_from_player_df_to_stat_map(df_kicking_stats, stat_map)
|
144 |
+
add_stats_from_team_def_df_to_stat_map(df_def_stats, stat_map)
|
145 |
+
|
146 |
+
return stat_map
|
147 |
+
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
from queries.nflverse.github_data import load_assets
|
151 |
+
|
152 |
+
load_assets()
|
153 |
+
assemble_nflverse_stats()
|