YFDashboard / src /analyze_yahoo.py
Jon Solow
Fix against_all_wins calc
71c2c20
raw
history blame
3.13 kB
import pandas as pd
from typing import Optional
def calculate_luck(df: pd.DataFrame, as_of_week: Optional[int] = None, include_current: bool = False) -> pd.DataFrame:
if as_of_week:
df_complete = df[df.week <= as_of_week]
else:
status_list = ["postevent"]
if include_current:
status_list.append("midevent")
df_complete = df[df.matchup_status.isin(status_list)]
df_complete["actual_wins"] = df_complete["win_probability"].apply(lambda x: x > 0.5)
df_list = []
n_teams = df.team_name.nunique()
for week, df_week in df_complete.groupby("week"):
if len(df_week) != n_teams:
next
else:
df_week["against_all_wins"] = ((df_week.team_points.rank().astype("float") - 1) / (n_teams - 1)).round(2)
df_week["against_all_losses"] = 1 - df_week["against_all_wins"]
df_week["half_wins"] = (df_week["against_all_wins"] >= 0.5) * 1.0
df_week["half_losses"] = 1 - df_week["half_wins"]
df_week["against_all_luck"] = df_week["actual_wins"] - df_week["against_all_wins"]
df_week["half_luck"] = df_week["actual_wins"] - df_week["half_wins"]
df_week["earned_wins"] = ((df_week["against_all_wins"] + df_week["half_wins"]) / 2).round(2)
df_week["luck_wins"] = df_week["actual_wins"] - df_week["earned_wins"]
df_list.append(df_week)
df_luck = pd.concat(df_list)
return df_luck
def get_grouped_luck(df_luck_all_weeks: pd.DataFrame) -> pd.DataFrame:
summ_cols = [
"team_name",
"team_points",
"against_all_wins",
"half_wins",
"actual_wins",
"earned_wins",
"luck_wins",
]
sort_by = "luck_wins"
return df_luck_all_weeks[summ_cols].groupby("team_name").sum().sort_values(sort_by, ascending=False)
def summarize_remaining_wins_from_matches_map(matches_map):
"""
Return map for all teams to map of number remaining wins
to array of playoff and bye prob, respectively.
"""
remaining_map = {}
for team_name, team_matches_map in matches_map.items():
team_remaining_map = {}
for match_binary_str, prob_list in team_matches_map.items():
n_wins = sum([int(x) for x in match_binary_str])
if n_wins in team_remaining_map:
incr_obs, incr_playoff_prob, incr_bye_prob = prob_list
if incr_obs == 0:
continue
current_obs, current_playoff_prob, current_bye_prob = team_remaining_map[n_wins]
new_obs = current_obs + incr_obs
new_playoff_prob = round(
(current_obs * current_playoff_prob + incr_obs * incr_playoff_prob) / new_obs, 3
)
new_bye_prob = round((current_obs * current_bye_prob + incr_obs * incr_bye_prob) / new_obs, 3)
team_remaining_map[n_wins] = [new_obs, new_playoff_prob, new_bye_prob]
else:
team_remaining_map[n_wins] = prob_list
remaining_map[team_name] = team_remaining_map
return remaining_map