import numpy as np import pandas as pd from stqdm import stqdm from typing import List, Mapping, MutableMapping, Tuple def simulate_game(team_name: str, mean_points: float, std_points: float) -> float: general_normal = np.round(np.random.normal(mean_points, std_points), 3) return general_normal def simulate_week_matchups(df_week: pd.DataFrame, mean_points: float, std_points: float) -> pd.DataFrame: df_week.loc[:, "team_points"] = df_week.team_name.apply(lambda x: simulate_game(x, mean_points, std_points)).values df_week.loc[:, "max_match"] = df_week.groupby("match_index").team_points.transform("max").values df_week.loc[:, "win_probability"] = ((df_week["team_points"] == df_week["max_match"]) * 1.0).values df_week.drop(columns=["max_match"], inplace=True) return df_week def simulate_remaining_season(df_completed_weeks: pd.DataFrame, df_incomplete_weeks: pd.DataFrame) -> pd.DataFrame: df_comp = df_completed_weeks.copy() df_inc = df_incomplete_weeks.copy() mean_points = df_comp.team_points.mean() std_points = df_comp.team_points.std() sim_week_list = [ simulate_week_matchups(df_week, mean_points, std_points) for (_, df_week) in df_inc.groupby("week") ] df_full_sim = pd.concat([df_comp] + sim_week_list) return df_full_sim def summarize_season(df_sim: pd.DataFrame, n_bye: int, n_playoff: int) -> pd.DataFrame: sim_sum = ( df_sim.groupby("team_name")[["win_probability", "team_points"]] .sum() .sort_values(["win_probability", "team_points"], ascending=False) ) sim_sum["season_rank"] = range(1, 1 + len(sim_sum)) sim_sum["bye"] = (sim_sum["season_rank"] <= n_bye) * 1 sim_sum["playoff"] = (sim_sum["season_rank"] <= n_playoff) * 1 return sim_sum def finalize_all(df: pd.DataFrame) -> None: df["win_probability"] = (df.groupby(["week", "match_index"]).team_points.transform("max") == df.team_points) * 1 def run_simulations(df: pd.DataFrame, complete_weeks: int, n_sims: int, n_playoff: int): if n_playoff == 6: n_bye = 2 else: n_bye = 0 df_comp = df[df.week <= complete_weeks] finalize_all(df_comp) df_inc = df[df.week > complete_weeks] sim_list = [] for i in stqdm(range(n_sims)): df_sim = simulate_remaining_season(df_comp, df_inc) sim_sum = summarize_season(df_sim, n_bye, n_playoff) df_simmed = df_sim[df_sim.week > complete_weeks] win_dict = { match_key: df_match.sort_values("team_points").team_name.iloc[-1] for (match_key, df_match) in df_simmed.groupby(["week", "match_index"]) } df_wins = pd.DataFrame(win_dict, index=[i]) df_melt = ( sim_sum.reset_index()[["team_name", "bye", "playoff", "season_rank", "team_points"]] .melt(id_vars="team_name") .sort_values(["variable", "team_name"]) ) df_team_sum = pd.DataFrame( {x[0]: x[1] for x in df_melt.apply(lambda r: [(r.variable, r.team_name), r.value], axis=1).values}, index=[i], ) df_sim_result = df_team_sum.join(df_wins) sim_list.append(df_sim_result) df_all_sims = pd.concat(sim_list) return df_all_sims def create_simulate_summary(sims: pd.DataFrame) -> pd.DataFrame: df_sim_sum = pd.DataFrame() df_sim_sum["bye"] = sims.bye.mean() df_sim_sum["playoffs"] = sims.playoff.mean() return ( df_sim_sum[["bye", "playoffs"]] .sort_values(["playoffs", "bye"], ascending=False) .map(lambda n: "{:,.2%}".format(n)) ) def get_matches_by_team_from_sims_df(sims: pd.DataFrame) -> Mapping[str, List[Tuple[int]]]: team_matches: MutableMapping[str, List[Tuple[int]]] = {} for col in sims.columns: if isinstance(col[0], (int, float)): teams_in_match = sims[col].unique() for team in teams_in_match: if team in team_matches: team_matches[team].append(col) else: team_matches[team] = [col] return team_matches def calc_wins_on_scenario(team_name, match_cols_list, sims_df): n_matches = len(match_cols_list) scenario_bye_playoff_results = {} for i in range(2**n_matches): binary_scenario = format(i, f"0{n_matches}b") filters = [] for scenario, match in zip(binary_scenario, match_cols_list): match_filter = (sims_df[match] == team_name) == bool(int(scenario)) filters.append(match_filter) filtered_sims = sims_df[pd.DataFrame(filters).all()] playoff_odds = filtered_sims["playoff"][team_name].mean() bye_odds = filtered_sims["bye"][team_name].mean() scenario_bye_playoff_results[binary_scenario] = np.nan_to_num( [len(filtered_sims), round(playoff_odds, 3), round(bye_odds, 3)] ).tolist() return scenario_bye_playoff_results def calculate_scenario_probabilities(sims: pd.DataFrame) -> Mapping: remaining_matches = get_matches_by_team_from_sims_df(sims) team_scenario_probs = { team: calc_wins_on_scenario(team, matches, sims) for team, matches in remaining_matches.items() } return team_scenario_probs