Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
from stqdm import stqdm | |
from typing import List, Mapping, MutableMapping, Tuple | |
def simulate_game(team_name: str, mean_points: float, std_points: float) -> float: | |
general_normal = np.round(np.random.normal(mean_points, std_points), 3) | |
return general_normal | |
def simulate_week_matchups(df_week: pd.DataFrame, mean_points: float, std_points: float) -> pd.DataFrame: | |
df_week.loc[:, "team_points"] = df_week.team_name.apply(lambda x: simulate_game(x, mean_points, std_points)).values | |
df_week.loc[:, "max_match"] = df_week.groupby("match_index").team_points.transform("max").values | |
df_week.loc[:, "win_probability"] = ((df_week["team_points"] == df_week["max_match"]) * 1.0).values | |
df_week.drop(columns=["max_match"], inplace=True) | |
return df_week | |
def simulate_remaining_season(df_completed_weeks: pd.DataFrame, df_incomplete_weeks: pd.DataFrame) -> pd.DataFrame: | |
df_comp = df_completed_weeks.copy() | |
df_inc = df_incomplete_weeks.copy() | |
mean_points = df_comp.team_points.mean() | |
std_points = df_comp.team_points.std() | |
sim_week_list = [ | |
simulate_week_matchups(df_week, mean_points, std_points) for (_, df_week) in df_inc.groupby("week") | |
] | |
df_full_sim = pd.concat([df_comp] + sim_week_list) | |
return df_full_sim | |
def summarize_season(df_sim: pd.DataFrame, n_bye: int, n_playoff: int) -> pd.DataFrame: | |
sim_sum = ( | |
df_sim.groupby("team_name")[["win_probability", "team_points"]] | |
.sum() | |
.sort_values(["win_probability", "team_points"], ascending=False) | |
) | |
sim_sum["season_rank"] = range(1, 1 + len(sim_sum)) | |
sim_sum["bye"] = (sim_sum["season_rank"] <= n_bye) * 1 | |
sim_sum["playoff"] = (sim_sum["season_rank"] <= n_playoff) * 1 | |
return sim_sum | |
def finalize_all(df: pd.DataFrame) -> None: | |
df["win_probability"] = (df.groupby(["week", "match_index"]).team_points.transform("max") == df.team_points) * 1 | |
def run_simulations(df: pd.DataFrame, complete_weeks: int, n_sims: int, n_playoff: int): | |
if n_playoff == 6: | |
n_bye = 2 | |
else: | |
n_bye = 0 | |
df_comp = df[df.week <= complete_weeks] | |
finalize_all(df_comp) | |
df_inc = df[df.week > complete_weeks] | |
sim_list = [] | |
for i in stqdm(range(n_sims)): | |
df_sim = simulate_remaining_season(df_comp, df_inc) | |
sim_sum = summarize_season(df_sim, n_bye, n_playoff) | |
df_simmed = df_sim[df_sim.week > complete_weeks] | |
win_dict = { | |
match_key: df_match.sort_values("team_points").team_name.iloc[-1] | |
for (match_key, df_match) in df_simmed.groupby(["week", "match_index"]) | |
} | |
df_wins = pd.DataFrame(win_dict, index=[i]) | |
df_melt = ( | |
sim_sum.reset_index()[["team_name", "bye", "playoff", "season_rank", "team_points"]] | |
.melt(id_vars="team_name") | |
.sort_values(["variable", "team_name"]) | |
) | |
df_team_sum = pd.DataFrame( | |
{x[0]: x[1] for x in df_melt.apply(lambda r: [(r.variable, r.team_name), r.value], axis=1).values}, | |
index=[i], | |
) | |
df_sim_result = df_team_sum.join(df_wins) | |
sim_list.append(df_sim_result) | |
df_all_sims = pd.concat(sim_list) | |
return df_all_sims | |
def create_simulate_summary(sims: pd.DataFrame) -> pd.DataFrame: | |
df_sim_sum = pd.DataFrame() | |
df_sim_sum["bye"] = sims.bye.mean() | |
df_sim_sum["playoffs"] = sims.playoff.mean() | |
return ( | |
df_sim_sum[["bye", "playoffs"]] | |
.sort_values(["playoffs", "bye"], ascending=False) | |
.map(lambda n: "{:,.2%}".format(n)) | |
) | |
def get_matches_by_team_from_sims_df(sims: pd.DataFrame) -> Mapping[str, List[Tuple[int]]]: | |
team_matches: MutableMapping[str, List[Tuple[int]]] = {} | |
for col in sims.columns: | |
if isinstance(col[0], (int, float)): | |
teams_in_match = sims[col].unique() | |
for team in teams_in_match: | |
if team in team_matches: | |
team_matches[team].append(col) | |
else: | |
team_matches[team] = [col] | |
return team_matches | |
def calc_wins_on_scenario(team_name, match_cols_list, sims_df): | |
n_matches = len(match_cols_list) | |
scenario_bye_playoff_results = {} | |
for i in range(2**n_matches): | |
binary_scenario = format(i, f"0{n_matches}b") | |
filters = [] | |
for scenario, match in zip(binary_scenario, match_cols_list): | |
match_filter = (sims_df[match] == team_name) == bool(int(scenario)) | |
filters.append(match_filter) | |
filtered_sims = sims_df[pd.DataFrame(filters).all()] | |
playoff_odds = filtered_sims["playoff"][team_name].mean() | |
bye_odds = filtered_sims["bye"][team_name].mean() | |
scenario_bye_playoff_results[binary_scenario] = np.nan_to_num( | |
[len(filtered_sims), round(playoff_odds, 3), round(bye_odds, 3)] | |
).tolist() | |
return scenario_bye_playoff_results | |
def calculate_scenario_probabilities(sims: pd.DataFrame) -> Mapping: | |
remaining_matches = get_matches_by_team_from_sims_df(sims) | |
team_scenario_probs = { | |
team: calc_wins_on_scenario(team, matches, sims) for team, matches in remaining_matches.items() | |
} | |
return team_scenario_probs | |