import pandas as pd from domain.playoffs import SCHEDULE_WEEK_TO_PLAYOFF_WEEK from domain.teams import PFR_NAME_TO_SCHEDULE_NAME_MAP def get_full_schedule(season_int: str | int) -> pd.DataFrame: url = f"https://www.pro-football-reference.com/years/{season_int}/games.htm#games" df = pd.read_html(url)[0] # remove extra header rows in table df = df[df.Week != "Week"] return df def get_season_game_map( season_int: str | int, ) -> tuple[dict[int, dict[str, dict[str, str | int | pd.Timestamp]]], dict[int, pd.Timestamp]]: df_schedule = get_full_schedule(season_int) week_team_time_map: dict[int, dict[str, dict[str, str | int | pd.Timestamp]]] = { k: {} for k in SCHEDULE_WEEK_TO_PLAYOFF_WEEK.values() } last_game_week_map: dict[int, pd.Timestamp] = {} for _, row in df_schedule.iterrows(): if mapped_week := SCHEDULE_WEEK_TO_PLAYOFF_WEEK.get(row.Week): game_time = pd.to_datetime(row.Date + " " + row.Time, yearfirst=True).tz_localize("EST") if mapped_week not in last_game_week_map: last_game_week_map[mapped_week] = game_time else: last_game_week_map[mapped_week] = max(game_time, last_game_week_map[mapped_week]) # only actual winner/loser if game has already happened winner_team = PFR_NAME_TO_SCHEDULE_NAME_MAP[row["Winner/tie"]] loser_team = PFR_NAME_TO_SCHEDULE_NAME_MAP[row["Loser/tie"]] winner_game = { "gametime": game_time, "opponent": loser_team, } loser_game = { "gametime": game_time, "opponent": winner_team, } if isinstance(row["PtsW"], str) and isinstance(row["PtsL"], str): winner_game["score"] = int(row["PtsW"]) winner_game["opponent_score"] = int(row["PtsL"]) winner_game["status"] = "Win" loser_game["score"] = int(row["PtsL"]) loser_game["opponent_score"] = int(row["PtsW"]) loser_game["status"] = "Loss" week_team_time_map[mapped_week][winner_team] = winner_game week_team_time_map[mapped_week][loser_team] = loser_game return week_team_time_map, last_game_week_map