Jon Solow
Revert "Revert "Replace the hack id map for yahoo player ids to gsis_id""
73408d5
raw
history blame
13.9 kB
from dataclasses import dataclass
import json
import pandas as pd
import requests
import streamlit as st
from domain.playoffs import PLAYOFF_TEAM_DEF_PLAYER
from login import get_stat_overrides
from queries.nflverse.github_data import get_player_kicking_stats, get_player_stats, get_team_defense_stats
STAT_CACHE_SECONDS = 60 * 2
@dataclass
class StatType:
key: str
score: float
def __post_init__(self):
STAT_KEY_MAP[self.key] = self
STAT_KEY_MAP: dict[str, StatType] = {}
RUSH_TD = StatType(key="RUSH TD", score=6.0)
REC_TD = StatType(key="REC TD", score=6.0)
OFF_FUM_TD = StatType(key="OFF FUM TD", score=6.0)
PASS_TD = StatType(key="PASS TD", score=4.0)
FG_0_49 = StatType(key="FG 0-49", score=3.0)
FG_50_ = StatType(key="FG 50+", score=5.0)
TWO_PT = StatType(key="2 PT", score=2.0)
RECEPTION = StatType(key="REC", score=1.0)
RUSH_YD = StatType(key="RUSH YD", score=0.1)
REC_YD = StatType(key="REC YD", score=0.1)
PASS_YD = StatType(key="PASS YD", score=0.04)
XP = StatType(key="XP", score=1.0)
FUM_LOST = StatType(key="FUM LOST", score=-2.0)
PASS_INT = StatType(key="PASS INT", score=-2.0)
RET_TD = StatType(key="RET TD", score=6.0)
DEF_TD = StatType(key="DEF TD", score=6.0)
DEF_INT = StatType(key="DEF INT", score=2.0)
FUM_REC = StatType(key="FUM REC", score=2.0)
SAFETY = StatType(key="SAFETY", score=2.0)
SACK = StatType(key="SACK", score=1.0)
PTS_ALLOW_0 = StatType(key="PTS 0", score=10.0)
PTS_ALLOW_1_6 = StatType(key="PTS 1-6", score=7.0)
PTS_ALLOW_7_13 = StatType(key="PTS 7-13", score=4.0)
PTS_ALLOW_14_20 = StatType(key="PTS 14-20", score=1.0)
PTS_ALLOW_21_27 = StatType(key="PTS 21-27", score=0.0)
PTS_ALLOW_28_34 = StatType(key="PTS 28-34", score=-1.0)
PTS_ALLOW_35_ = StatType(key="PTS 35+", score=-4.0)
TEAM_WIN = StatType(key="TEAM WIN", score=5.0)
ST_TD = StatType(key="ST TD", score=6.0)
NFLVERSE_STAT_COL_TO_ID: dict[str, str] = {
"passing_tds": PASS_TD.key,
"passing_yards": PASS_YD.key,
"passing_2pt_conversions": TWO_PT.key,
"sack_fumbles_lost": FUM_LOST.key,
"interceptions": PASS_INT.key,
"rushing_tds": RUSH_TD.key,
"rushing_yards": RUSH_YD.key,
"rushing_2pt_conversions": TWO_PT.key,
"rushing_fumbles_lost": FUM_LOST.key,
"receptions": RECEPTION.key,
"receiving_tds": REC_TD.key,
"receiving_yards": REC_YD.key,
"receiving_2pt_conversions": TWO_PT.key,
"receiving_fumbles_lost": FUM_LOST.key,
"special_teams_tds": ST_TD.key,
"pat_made": XP.key,
"fg_made_0_19": FG_0_49.key,
"fg_made_20_29": FG_0_49.key,
"fg_made_30_39": FG_0_49.key,
"fg_made_40_49": FG_0_49.key,
"fg_made_50_59": FG_50_.key,
"fg_made_60_": FG_50_.key,
"def_sacks": SACK.key,
"def_interceptions": DEF_INT.key,
"def_tds": DEF_TD.key,
"def_fumble_recovery_opp": FUM_REC.key,
"def_safety": SAFETY.key,
}
NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK = {
19: 1,
20: 2,
21: 3,
22: 4,
}
def add_stats_from_player_df_to_stat_map(df: pd.DataFrame, stat_map):
df_playoffs = df[df.week.isin(NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.keys())]
df_playoffs.week = df_playoffs.week.apply(lambda x: NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK[x])
for week_player_id_tuple, row in df_playoffs.set_index(["week", "player_id"]).iterrows():
if isinstance(week_player_id_tuple, tuple):
week, player_id = week_player_id_tuple
else:
# this won't happen but makes mypy happy
continue
player_stats: dict[str, float] = {}
for k, v in row.to_dict().items():
if k in NFLVERSE_STAT_COL_TO_ID:
if (mapped_k := NFLVERSE_STAT_COL_TO_ID[k]) in player_stats:
player_stats[mapped_k] += v
else:
player_stats[mapped_k] = v
if player_id not in stat_map[week]:
stat_map[week][player_id] = player_stats
else:
stat_map[week][player_id].update(player_stats)
def add_stats_from_team_def_df_to_stat_map(df: pd.DataFrame, stat_map):
short_team_names_to_player_id = {t.rosters_short_name: p for t, p in PLAYOFF_TEAM_DEF_PLAYER}
df_playoffs = df[
(df.week.isin(NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.keys()) & df.team.isin(short_team_names_to_player_id.keys()))
]
df_playoffs.week = df_playoffs.week.apply(lambda x: NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK[x])
for week_team_tuple, row in df_playoffs.set_index(["week", "team"]).iterrows():
if isinstance(week_team_tuple, tuple):
week, team = week_team_tuple
else:
# this won't happen but makes mypy happy
continue
player_stats: dict[str, float] = {}
player_id = short_team_names_to_player_id[team]
for k, v in row.to_dict().items():
if k in NFLVERSE_STAT_COL_TO_ID:
if (mapped_k := NFLVERSE_STAT_COL_TO_ID[k]) in player_stats:
player_stats[mapped_k] += v
else:
player_stats[mapped_k] = v
if player_id not in stat_map[week]:
stat_map[week][player_id] = player_stats
else:
stat_map[week][player_id].update(player_stats)
def add_st_stats_to_defense(df: pd.DataFrame, stat_map):
short_team_names_to_player_id = {t.rosters_short_name: p for t, p in PLAYOFF_TEAM_DEF_PLAYER}
df_playoffs = df[
(df.week.isin(NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.keys()) & df.team.isin(short_team_names_to_player_id.keys()))
]
df_playoffs.week = df_playoffs.week.apply(lambda x: NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK[x])
for week_team_tuple, row in df_playoffs.set_index(["week", "team"]).iterrows():
if isinstance(week_team_tuple, tuple):
week, team = week_team_tuple
else:
# this won't happen but makes mypy happy
continue
player_id = short_team_names_to_player_id[team]
player_stats: dict[str, float] = stat_map[week].get(player_id, {})
# special teams td update only
for k, v in row.to_dict().items():
if k == "special_teams_tds":
if (mapped_k := NFLVERSE_STAT_COL_TO_ID[k]) in player_stats:
player_stats[mapped_k] += v
else:
player_stats[mapped_k] = v
stat_map[week][player_id] = player_stats
# 24 hour cache
@st.cache_data(ttl=60 * 60 * 24)
def assemble_nflverse_stats() -> dict[int, dict[str, dict[str, float]]]:
# map week -> player_id -> stat_key -> stat value
stat_map: dict[int, dict[str, dict[str, float]]] = {w: {} for w in NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.values()}
df_player_stats = get_player_stats()
df_kicking_stats = get_player_kicking_stats()
df_def_stats = get_team_defense_stats()
add_stats_from_player_df_to_stat_map(df_player_stats, stat_map)
add_stats_from_player_df_to_stat_map(df_kicking_stats, stat_map)
add_stats_from_team_def_df_to_stat_map(df_def_stats, stat_map)
add_st_stats_to_defense(df_player_stats, stat_map)
return stat_map
def get_live_stats() -> dict[int, dict[str, dict[str, float]]]:
return get_yahoo_stats()
YAHOO_TO_STAT_MAP: dict[str, dict[str, str]] = {
"PASSING": {
"PASSING_YARDS": PASS_YD.key,
"PASSING_TOUCHDOWNS": PASS_TD.key,
"PASSING_INTERCEPTIONS": PASS_INT.key,
"FUMBLES_LOST": FUM_LOST.key,
},
"RUSHING": {
"RUSHING_TOUCHDOWNS": RUSH_TD.key,
"FUMBLES_LOST": FUM_LOST.key,
"RUSHING_YARDS": RUSH_YD.key,
},
"RECEIVING": {
"RECEPTIONS": RECEPTION.key,
"RECEIVING_YARDS": REC_YD.key,
"RECEIVING_TOUCHDOWNS": REC_TD.key,
"FUMBLES_LOST": FUM_LOST.key,
},
"KICKING": {
"FIELD_GOALS_MADE_0_19": FG_0_49.key,
"FIELD_GOALS_MADE_20_29": FG_0_49.key,
"FIELD_GOALS_MADE_30_39": FG_0_49.key,
"FIELD_GOALS_MADE_40_49": FG_0_49.key,
"FIELD_GOALS_MADE_50_PLUS": FG_50_.key,
"EXTRA_POINTS_MADE": XP.key,
},
"DEFENSE": {
"SACKS": SACK.key,
"INTERCEPTIONS_FORCED": DEF_INT.key,
"INTERCEPTION_RETURN_TOUCHDOWNS": DEF_TD.key,
"FORCED_FUMBLES": FUM_REC.key,
"FUMBLE_RETURN_TOUCHDOWNS": DEF_TD.key,
"SAFETIES": SAFETY.key,
},
}
# cache id map for 24 hours
@st.cache_data(ttl=60 * 60 * 24)
def get_yahoo_id_map() -> dict[str, str]:
df = pd.read_csv(r"https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv")
df = df[(df["yahoo_id"].notna() & df["gsis_id"].notna())]
df["yahoo_id"] = df["yahoo_id"].astype(int).astype(str)
return df.set_index("yahoo_id")["gsis_id"].to_dict()
# happens to be the same
YAHOO_WEEK_MAP = NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK
def add_yahoo_stat_type_to_stat_map(
stats_object, yahoo_stat_type: str, stat_map: dict[int, dict[str, dict[str, float]]]
):
assert yahoo_stat_type in YAHOO_TO_STAT_MAP
for raw_week, week_dict in stats_object.items():
week = YAHOO_WEEK_MAP[int(raw_week)]
if week not in stat_map:
stat_map[week] = {}
# only used for defense summary
short_team_names_to_player_id = {}
if yahoo_stat_type == "KICKING":
week_leaders = week_dict["POSTSEASON"][""]["FIELD_GOALS_MADE"]["leagues"][0]["leagueWeeks"][0]["leaders"]
elif yahoo_stat_type == "DEFENSE":
week_leaders = week_dict["POSTSEASON"][""]["TOTAL_TACKLES"]["leagues"][0]["leagueWeeks"][0]["leaders"]
short_team_names_to_player_id = {t.rosters_short_name: p for t, p in PLAYOFF_TEAM_DEF_PLAYER}
else:
week_leaders = week_dict["POSTSEASON"][""][f"{yahoo_stat_type}_YARDS"]["leagues"][0]["leagueWeeks"][0][
"leaders"
]
for player in week_leaders:
if yahoo_stat_type == "DEFENSE":
player_id = short_team_names_to_player_id[player["player"]["team"]["abbreviation"]]
else:
raw_player_id = player["player"]["playerId"].split(".")[-1]
player_id = get_yahoo_id_map().get(raw_player_id, raw_player_id)
if player_id not in stat_map[week]:
stat_map[week][player_id] = {}
stats = player["stats"]
for stat in stats:
if stat_key := YAHOO_TO_STAT_MAP[yahoo_stat_type].get(stat["statId"]):
if stat_key in stat_map[week][player_id]:
stat_map[week][player_id][stat_key] += float(stat["value"] or 0.0)
else:
stat_map[week][player_id][stat_key] = float(stat["value"] or 0.0)
# else:
# # remove after mapping all intended
# stat_map[week][player_id][stat["statId"]] = stat["value"]
def get_yahoo_stat_json_obj():
url = "https://sports.yahoo.com/nfl/stats/weekly/?selectedTable=0"
request = requests.get(url)
request_content_str = request.text
start_str = """root.App.main = """
end_str = """;\n}(this));"""
start_slice_pos = request_content_str.find(start_str) + len(start_str)
first_slice = request_content_str[start_slice_pos:]
end_slice_pos = first_slice.find(end_str)
dom_str = first_slice[:end_slice_pos]
dom_json = json.loads(dom_str)
return dom_json
def get_yahoo_stats() -> dict[int, dict[str, dict[str, float]]]:
dom_json = get_yahoo_stat_json_obj()
stat_map: dict[int, dict[str, dict[str, float]]] = {w: {} for w in NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.values()}
stats_json = dom_json["context"]["dispatcher"]["stores"]["GraphStatsStore"]
add_yahoo_stat_type_to_stat_map(stats_json["weeklyStatsFootballPassing"]["nfl"]["200"]["2023"], "PASSING", stat_map)
add_yahoo_stat_type_to_stat_map(stats_json["weeklyStatsFootballRushing"]["nfl"]["200"]["2023"], "RUSHING", stat_map)
add_yahoo_stat_type_to_stat_map(
stats_json["weeklyStatsFootballReceiving"]["nfl"]["200"]["2023"], "RECEIVING", stat_map
)
add_yahoo_stat_type_to_stat_map(stats_json["weeklyStatsFootballKicking"]["nfl"]["200"]["2023"], "KICKING", stat_map)
return_stats = stats_json["weeklyStatsFootballReturns"]["nfl"]["200"]["2023"]
add_yahoo_stat_type_to_stat_map(stats_json["weeklyStatsFootballDefense"]["nfl"]["200"]["2023"], "DEFENSE", stat_map)
return stat_map
@st.cache_data(ttl=STAT_CACHE_SECONDS)
def get_stats_map() -> dict[int, dict[str, dict[str, float]]]:
# use live stats if available
stat_map = get_live_stats()
# use more permanent nflverse stats over live
nflverse_stats = assemble_nflverse_stats()
# we overwrite the live stats with nflverse stats if they exist for the same player
for week, week_stats in nflverse_stats.items():
for player_id, player_stats in week_stats.items():
stat_map[week][player_id] = player_stats
stat_overrides = get_stat_overrides()
# for stat overrides, override at the stat level
for week, week_stats in stat_overrides.items():
for player_id, player_stats in week_stats.items():
for stat_key, stat_value in player_stats.items():
if player_id not in stat_map[week]:
stat_map[week][player_id] = {}
stat_map[week][player_id][stat_key] = stat_value
return stat_map
@st.cache_data(ttl=STAT_CACHE_SECONDS)
def get_scores_map() -> dict[int, dict[str, float]]:
scores_map: dict[int, dict[str, float]] = {w: {} for w in NFLVERSE_STAT_WEEK_TO_PLAYOFF_WEEK.values()}
stat_map = get_stats_map()
for week, week_stats in stat_map.items():
for player_id, player_stats in week_stats.items():
score = 0.0
for stat_key, stat_value in player_stats.items():
stat_type = STAT_KEY_MAP[stat_key]
score += stat_type.score * stat_value
scores_map[week][player_id] = score
return scores_map