Jon Solow commited on
Commit
9ec1831
·
1 Parent(s): 83482b1

Implement stat mapping

Browse files
Files changed (2) hide show
  1. src/queries/nflverse/github_data.py +19 -0
  2. src/stats.py +153 -0
src/queries/nflverse/github_data.py CHANGED
@@ -89,10 +89,29 @@ def get_player_kicking_stats(season_int: int | str = SEASON) -> pd.DataFrame:
89
  return df
90
 
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  NFLVERSE_ASSETS = [
93
  ("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
94
  ("player_stats", f"player_stats_{SEASON}.parquet"),
95
  ("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
 
96
  ]
97
 
98
 
 
89
  return df
90
 
91
 
92
+ def get_team_defense_stats(season_int: int | str = SEASON) -> pd.DataFrame:
93
+ df = duckdb.sql(
94
+ f"""
95
+ select
96
+ week
97
+ , team
98
+ , sum(def_sacks) as def_sacks
99
+ , sum(def_interceptions) as def_interceptions
100
+ , sum(def_tds) as def_tds
101
+ , sum(def_fumble_recovery_opp) as def_fumble_recovery_opp
102
+ , sum(def_safety) as def_safety
103
+ from player_stats_player_stats_def_{season_int}
104
+ group by week, team
105
+ """
106
+ ).df()
107
+ return df
108
+
109
+
110
  NFLVERSE_ASSETS = [
111
  ("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
112
  ("player_stats", f"player_stats_{SEASON}.parquet"),
113
  ("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
114
+ ("player_stats", f"player_stats_def_{SEASON}.parquet"),
115
  ]
116
 
117
 
src/stats.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ import pandas as pd
3
+
4
+ from domain.playoffs import PLAYOFF_TEAM_DEF_PLAYER
5
+ from queries.nflverse.github_data import get_player_kicking_stats, get_player_stats, get_team_defense_stats
6
+
7
+
8
+ @dataclass
9
+ class StatType:
10
+ key: str
11
+ score: float
12
+
13
+
14
+ RUSH_TD = StatType(key="RUSH TD", score=6.0)
15
+ REC_TD = StatType(key="REC TD", score=6.0)
16
+ OFF_FUM_TD = StatType(key="OFF FUM TD", score=6.0)
17
+ PASS_TD = StatType(key="PASS TD", score=4.0)
18
+ FG_0_49 = StatType(key="FG 0-49", score=3.0)
19
+ FG_50_ = StatType(key="FG 50+", score=5.0)
20
+ TWO_PT = StatType(key="2 PT", score=2.0)
21
+ RECEPTION = StatType(key="REC", score=1.0)
22
+ RUSH_YD = StatType(key="RUSH YD", score=0.1)
23
+ REC_YD = StatType(key="REC YD", score=0.1)
24
+ PASS_YD = StatType(key="PASS YD", score=0.04)
25
+ XP = StatType(key="XP", score=1.0)
26
+ FUM_LOST = StatType(key="FUM LOST", score=-2.0)
27
+ PASS_INT = StatType(key="PASS INT", score=-2.0)
28
+ RET_TD = StatType(key="RET TD", score=6.0)
29
+ DEF_TD = StatType(key="DEF TD", score=6.0)
30
+ DEF_INT = StatType(key="DEF INT", score=2.0)
31
+ FUM_REC = StatType(key="FUM REC", score=2.0)
32
+ SAFETY = StatType(key="SAFETY", score=2.0)
33
+ SACK = StatType(key="SACK", score=1.0)
34
+ PTS_ALLOW_0 = StatType(key="PTS ALLOW 0", score=10.0)
35
+ PTS_ALLOW_1_6 = StatType(key="PTS ALLOW 1-6", score=7.0)
36
+ PTS_ALLOW_7_13 = StatType(key="PTS ALLOW 7-13", score=4.0)
37
+ PTS_ALLOW_14_20 = StatType(key="PTS ALLOW 14-20", score=1.0)
38
+ PTS_ALLOW_21_27 = StatType(key="PTS ALLOW 21-27", score=0.0)
39
+ PTS_ALLOW_28_34 = StatType(key="PTS ALLOW 28-34", score=-1.0)
40
+ PTS_ALLOW_35_ = StatType(key="PTS ALLOW 35+", score=-4.0)
41
+ TEAM_WIN = StatType(key="TEAM WIN", score=5.0)
42
+ ST_TD = StatType(key="ST TD", score=6.0)
43
+
44
+
45
+ NFLVERSE_STAT_COL_TO_ID: dict[str, str] = {
46
+ "passing_tds": PASS_TD.key,
47
+ "pasing_yards": PASS_YD.key,
48
+ "passing_2pt_conversions": TWO_PT.key,
49
+ "sack_fumbles_lost": FUM_LOST.key,
50
+ "interceptions": PASS_INT.key,
51
+ "rushing_tds": RUSH_TD.key,
52
+ "rushing_yards": RUSH_YD.key,
53
+ "rushing_2pt_conversions": TWO_PT.key,
54
+ "rushing_fumbles_lost": FUM_LOST.key,
55
+ "receptions": RECEPTION.key,
56
+ "receiving_tds": REC_TD.key,
57
+ "receiving_yards": REC_YD.key,
58
+ "receiving_2pt_conversions": TWO_PT.key,
59
+ "receiving_fumbles_lost": FUM_LOST.key,
60
+ "special_teams_tds": ST_TD.key,
61
+ "pat_made": XP.key,
62
+ "fg_made_0_19": FG_0_49.key,
63
+ "fg_made_20_29": FG_0_49.key,
64
+ "fg_made_30_39": FG_0_49.key,
65
+ "fg_made_40_49": FG_0_49.key,
66
+ "fg_made_50_59": FG_50_.key,
67
+ "fg_made_60_": FG_50_.key,
68
+ "def_sacks": SACK.key,
69
+ "def_interceptions": DEF_INT.key,
70
+ "def_tds": DEF_TD.key,
71
+ "def_fumble_recovery_opp": FUM_REC.key,
72
+ "def_safety": SAFETY.key,
73
+ }
74
+
75
+ NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK = {
76
+ 18: 1,
77
+ 20: 2,
78
+ 21: 3,
79
+ 22: 4,
80
+ }
81
+
82
+
83
+ def add_stats_from_player_df_to_stat_map(df: pd.DataFrame, stat_map):
84
+ df_playoffs = df[df.week.isin(NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.keys())]
85
+ df_playoffs.week = df_playoffs.week.apply(lambda x: NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK[x])
86
+ for week_player_id_tuple, row in df_playoffs.set_index(["week", "player_id"]).iterrows():
87
+ if isinstance(week_player_id_tuple, tuple):
88
+ week, player_id = week_player_id_tuple
89
+ else:
90
+ # this won't happen but makes mypy happy
91
+ continue
92
+ player_stats: dict[str, float] = {}
93
+ for k, v in row.to_dict().items():
94
+ if k in NFLVERSE_STAT_COL_TO_ID:
95
+ if (mapped_k := NFLVERSE_STAT_COL_TO_ID[k]) in player_stats:
96
+ player_stats[mapped_k] += v
97
+ else:
98
+ player_stats[mapped_k] = v
99
+
100
+ if player_id not in stat_map[week]:
101
+ stat_map[week][player_id] = player_stats
102
+ else:
103
+ stat_map[week][player_id].update(player_stats)
104
+
105
+
106
+ def add_stats_from_team_def_df_to_stat_map(df: pd.DataFrame, stat_map):
107
+ short_team_names_to_player_id = {t.rosters_short_name: p for t, p in PLAYOFF_TEAM_DEF_PLAYER}
108
+ df_playoffs = df[
109
+ (df.week.isin(NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.keys()) & df.team.isin(short_team_names_to_player_id.keys()))
110
+ ]
111
+ df_playoffs.week = df_playoffs.week.apply(lambda x: NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK[x])
112
+
113
+ for week_team_tuple, row in df_playoffs.set_index(["week", "team"]).iterrows():
114
+ if isinstance(week_team_tuple, tuple):
115
+ week, team = week_team_tuple
116
+ else:
117
+ # this won't happen but makes mypy happy
118
+ continue
119
+ player_stats: dict[str, float] = {}
120
+ player_id = short_team_names_to_player_id[team]
121
+ for k, v in row.to_dict().items():
122
+ if k in NFLVERSE_STAT_COL_TO_ID:
123
+ if (mapped_k := NFLVERSE_STAT_COL_TO_ID[k]) in player_stats:
124
+ player_stats[mapped_k] += v
125
+ else:
126
+ player_stats[mapped_k] = v
127
+
128
+ if player_id not in stat_map[week]:
129
+ stat_map[week][player_id] = player_stats
130
+ else:
131
+ stat_map[week][player_id].update(player_stats)
132
+
133
+
134
+ def assemble_nflverse_stats() -> dict[int, dict[str, dict[str, float]]]:
135
+ # map week -> player_id -> stat_key -> stat value
136
+ stat_map: dict[int, dict[str, dict[str, float]]] = {w: {} for w in NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.values()}
137
+
138
+ df_player_stats = get_player_stats()
139
+ df_kicking_stats = get_player_kicking_stats()
140
+ df_def_stats = get_team_defense_stats()
141
+
142
+ add_stats_from_player_df_to_stat_map(df_player_stats, stat_map)
143
+ add_stats_from_player_df_to_stat_map(df_kicking_stats, stat_map)
144
+ add_stats_from_team_def_df_to_stat_map(df_def_stats, stat_map)
145
+
146
+ return stat_map
147
+
148
+
149
+ if __name__ == "__main__":
150
+ from queries.nflverse.github_data import load_assets
151
+
152
+ load_assets()
153
+ assemble_nflverse_stats()