import pandas as pd from domain.playoffs import SCHEDULE_WEEK_TO_PLAYOFF_WEEK def get_full_schedule(season_int: str | int) -> pd.DataFrame: url = f"https://www.pro-football-reference.com/years/{season_int}/games.htm#games" df = pd.read_html(url)[0] # remove extra header rows in table df = df[df.Week != "Week"] return df def get_week_team_time_map(df_schedule: pd.DataFrame) -> dict[int, dict[str, pd.Timestamp]]: week_team_time_map: dict[int, dict[str, pd.Timestamp]] = {k: {} for k in SCHEDULE_WEEK_TO_PLAYOFF_WEEK.values()} for _, row in df_schedule.iterrows(): if mapped_week := SCHEDULE_WEEK_TO_PLAYOFF_WEEK.get(row.Week): game_time = pd.to_datetime(row.Date + " " + row.Time, yearfirst=True) week_team_time_map[mapped_week][row["Winner/tie"]] = game_time week_team_time_map[mapped_week][row["Loser/tie"]] = game_time return week_team_time_map def get_season_time_map(season_int: str | int) -> dict[int, dict[str, pd.Timestamp]]: df_schedule = get_full_schedule(season_int) return get_week_team_time_map(df_schedule)