import pandas as pd | |
from domain.playoffs import SCHEDULE_WEEK_TO_PLAYOFF_WEEK | |
def get_full_schedule(season_int: str | int) -> pd.DataFrame: | |
url = f"https://www.pro-football-reference.com/years/{season_int}/games.htm#games" | |
df = pd.read_html(url)[0] | |
# remove extra header rows in table | |
df = df[df.Week != "Week"] | |
return df | |
def get_week_team_time_map(df_schedule: pd.DataFrame) -> dict[int, dict[str, pd.Timestamp]]: | |
week_team_time_map: dict[int, dict[str, pd.Timestamp]] = {k: {} for k in SCHEDULE_WEEK_TO_PLAYOFF_WEEK.values()} | |
for _, row in df_schedule.iterrows(): | |
if mapped_week := SCHEDULE_WEEK_TO_PLAYOFF_WEEK.get(row.Week): | |
game_time = pd.to_datetime(row.Date + " " + row.Time, yearfirst=True) | |
week_team_time_map[mapped_week][row["Winner/tie"]] = game_time | |
week_team_time_map[mapped_week][row["Loser/tie"]] = game_time | |
return week_team_time_map | |
def get_season_time_map(season_int: str | int) -> dict[int, dict[str, pd.Timestamp]]: | |
df_schedule = get_full_schedule(season_int) | |
return get_week_team_time_map(df_schedule) | |