YFDashboard / src /analyze_yahoo.py
Jon Solow
Implement existing simulation in admin page
af23901
raw
history blame
4.36 kB
import json
import pandas as pd
from stqdm import stqdm
from typing import Optional
from simulate import (
calculate_scenario_probabilities,
create_simulate_summary,
run_simulations,
)
from yahoo_client import YahooFantasyClient
def calculate_luck(df: pd.DataFrame, as_of_week: Optional[int] = None, include_current: bool = False) -> pd.DataFrame:
if as_of_week:
df_complete = df[df.week <= as_of_week]
else:
status_list = ["postevent"]
if include_current:
status_list.append("midevent")
df_complete = df[df.matchup_status.isin(status_list)]
df_complete["actual_wins"] = df_complete["win_probability"].apply(lambda x: x > 0.5)
df_list = []
n_teams = df.team_name.nunique()
for week, df_week in df_complete.groupby("week"):
if len(df_week) != n_teams:
next
else:
df_week["against_all_wins"] = ((df_week.team_points.rank().astype("float") - 1) / (n_teams - 1)).round(2)
df_week["against_all_losses"] = 1 - df_week["against_all_wins"]
df_week["half_wins"] = (df_week["against_all_wins"] >= 0.5) * 1.0
df_week["half_losses"] = 1 - df_week["half_wins"]
df_week["against_all_luck"] = df_week["actual_wins"] - df_week["against_all_wins"]
df_week["half_luck"] = df_week["actual_wins"] - df_week["half_wins"]
df_week["earned_wins"] = ((df_week["against_all_wins"] + df_week["half_wins"]) / 2).round(2)
df_week["luck_wins"] = df_week["actual_wins"] - df_week["earned_wins"]
df_list.append(df_week)
df_luck = pd.concat(df_list)
return df_luck
def get_grouped_luck(df_luck_all_weeks: pd.DataFrame) -> pd.DataFrame:
summ_cols = [
"team_name",
"team_points",
"against_all_wins",
"half_wins",
"actual_wins",
"earned_wins",
"luck_wins",
]
sort_by = "luck_wins"
return df_luck_all_weeks[summ_cols].groupby("team_name").sum().sort_values(sort_by, ascending=False)
def summarize_remaining_wins_from_matches_map(matches_map):
"""
Return map for all teams to map of number remaining wins
to array of playoff and bye prob, respectively.
"""
remaining_map = {}
for team_name, team_matches_map in matches_map.items():
team_remaining_map = {}
for match_binary_str, prob_list in team_matches_map.items():
n_wins = sum([int(x) for x in match_binary_str])
if n_wins in team_remaining_map:
incr_obs, incr_playoff_prob, incr_bye_prob = prob_list
if incr_obs == 0:
continue
current_obs, current_playoff_prob, current_bye_prob = team_remaining_map[n_wins]
new_obs = current_obs + incr_obs
new_playoff_prob = round(
(current_obs * current_playoff_prob + incr_obs * incr_playoff_prob) / new_obs, 3
)
new_bye_prob = round((current_obs * current_bye_prob + incr_obs * incr_bye_prob) / new_obs, 3)
team_remaining_map[n_wins] = [new_obs, new_playoff_prob, new_bye_prob]
else:
team_remaining_map[n_wins] = prob_list
remaining_map[team_name] = team_remaining_map
return remaining_map
def analyze_league(league_key: str, yahoo_client: YahooFantasyClient) -> None:
df_scores = yahoo_client.full_schedule_dataframe(league_key)
league_settings = yahoo_client.parse_league_settings(league_key)
name_str = league_settings.name.strip().replace(" ", "_").lower()
sim_completed_weeks = league_settings.current_week - 1
print(f"{sim_completed_weeks=}")
stqdm.pandas()
df_sims = run_simulations(
df_scores,
complete_weeks=sim_completed_weeks,
n_sims=10000,
n_playoff=league_settings.num_playoff_teams,
)
df_sim_sum = create_simulate_summary(df_sims)
df_sim_sum.to_csv(f"{name_str}_sim_sum.csv")
scenario_probs = calculate_scenario_probabilities(df_sims)
with open(f"{name_str}_scenario_probs.json", "w") as f:
json.dump(scenario_probs, f)
remaining_wins_to_probs_map = summarize_remaining_wins_from_matches_map(scenario_probs)
with open(f"{name_str}_remaining_wins_probs.json", "w") as f:
json.dump(remaining_wins_to_probs_map, f)