File size: 1,115 Bytes
7fd5387
 
62740e0
 
7fd5387
 
 
 
 
 
 
 
 
 
13801e8
62740e0
7fd5387
62740e0
 
 
 
7fd5387
 
 
13801e8
7fd5387
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd

from domain.playoffs import SCHEDULE_WEEK_TO_PLAYOFF_WEEK


def get_full_schedule(season_int: str | int) -> pd.DataFrame:
    url = f"https://www.pro-football-reference.com/years/{season_int}/games.htm#games"
    df = pd.read_html(url)[0]

    # remove extra header rows in table
    df = df[df.Week != "Week"]
    return df


def get_week_team_time_map(df_schedule: pd.DataFrame) -> dict[int, dict[str, pd.Timestamp]]:
    week_team_time_map: dict[int, dict[str, pd.Timestamp]] = {k: {} for k in SCHEDULE_WEEK_TO_PLAYOFF_WEEK.values()}
    for _, row in df_schedule.iterrows():
        if mapped_week := SCHEDULE_WEEK_TO_PLAYOFF_WEEK.get(row.Week):
            game_time = pd.to_datetime(row.Date + " " + row.Time, yearfirst=True)
            week_team_time_map[mapped_week][row["Winner/tie"]] = game_time
            week_team_time_map[mapped_week][row["Loser/tie"]] = game_time
    return week_team_time_map


def get_season_time_map(season_int: str | int) -> dict[int, dict[str, pd.Timestamp]]:
    df_schedule = get_full_schedule(season_int)
    return get_week_team_time_map(df_schedule)