James McCool
Add simulation of statistical projections in app.py. Introduced a new function to simulate player statistics using a normal distribution, generating percentiles for kills, deaths, assists, and CS projections. This enhancement allows for a more comprehensive analysis of player performance by incorporating simulated data, improving the overall depth of statistical insights available in the application.
6244ceb
raw
history blame
19.8 kB
import streamlit as st
st.set_page_config(layout="wide")
import numpy as np
import pandas as pd
import pymongo
import time
from datetime import datetime, timedelta
from scipy import stats
@st.cache_resource
def init_conn():
uri = st.secrets['mongo_uri']
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
db = client["League_of_Legends_Database"]
current_date = datetime.now()
collection = db["gamelogs"]
max_date = current_date - timedelta(days=1)
min_date = current_date - timedelta(days=365)
team_names = collection.distinct("teamname")
player_names = collection.distinct("playername")
return db, team_names, player_names, min_date, max_date
db, team_names, player_names, min_date, max_date = init_conn()
display_formats = {'wKill%': '{:.2%}', 'wDeath%': '{:.2%}', 'wAssist%': '{:.2%}', 'lKill%': '{:.2%}', 'lDeath%': '{:.2%}', 'lAssist%': '{:.2%}'}
# Create sidebar container for options
with st.sidebar:
st.header("Team Analysis Options")
# Date filtering options
st.subheader("Date Range")
date_filter = st.radio(
"Select Date Range",
["Last Year", "Custom Range"]
)
if date_filter == "Last Year":
end_date = max_date
start_date = (end_date - timedelta(days=365))
else:
col1, col2 = st.columns(2)
with col1:
start_date = st.date_input(
"Start Date",
value=max_date.date() - timedelta(days=30),
min_value=min_date.date(),
max_value=max_date.date()
)
with col2:
end_date = st.date_input(
"End Date",
value=max_date.date(),
min_value=min_date.date(),
max_value=max_date.date()
)
selected_team = st.selectbox(
"Select Team",
options=team_names,
index=team_names.index("T1") if "T1" in team_names else 0
)
selected_opponent = st.selectbox(
"Select Opponent",
options=team_names,
index=team_names.index("T1") if "T1" in team_names else 0
)
st.subheader("Prediction Settings")
win_loss = st.selectbox(
"Select Win/Loss",
options=["Win", "Loss"],
index=0
)
game_settings = st.selectbox(
"Predict kills/deaths or use average?",
options=["Average", "Predict"],
index=0
)
if game_settings == "Average":
kill_prediction = 0
death_prediction = 0
else:
kill_prediction = st.number_input(
"Predicted Team Kills",
min_value=1,
max_value=100,
value=20
)
death_prediction = st.number_input(
"Predicted Team Deaths",
min_value=1,
max_value=100,
value=5
)
@st.cache_data(ttl = 60)
def simulate_stats(row, num_sims=1000):
"""Simulate stats using normal distribution"""
# Using coefficient of variation of 0.3 to generate reasonable standard deviations
cv = 0.3
percentiles = [10, 25, 50, 75, 90]
results = {}
for stat in ['Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']:
mean = row[stat]
std = mean * cv # Using coefficient of variation to determine std
sims = stats.norm.rvs(loc=mean, scale=std, size=num_sims)
# Ensure no negative values
sims = np.maximum(sims, 0)
results[stat] = np.percentile(sims, percentiles)
return pd.Series(results)
@st.cache_data(ttl = 60)
def init_team_data(team, opponent, win_loss, kill_prediction, death_prediction, start_date, end_date):
# Convert date objects to datetime strings in the correct format
start_datetime = datetime.combine(start_date, datetime.min.time()).strftime("%Y-%m-%d %H:%M:%S")
end_datetime = datetime.combine(end_date, datetime.max.time()).strftime("%Y-%m-%d %H:%M:%S")
collection = db["gamelogs"]
cursor = collection.find({"teamname": team, "date": {"$gte": start_datetime, "$lte": end_datetime}})
raw_display = pd.DataFrame(list(cursor))
cursor = collection.find({"date": {"$gte": start_datetime, "$lte": end_datetime}})
raw_opponent = pd.DataFrame(list(cursor))
tables_to_loop = [raw_display, raw_opponent]
for loop in range(len(tables_to_loop)):
tables = tables_to_loop[loop]
calc_columns = ['kills', 'deaths', 'assists', 'total_cs']
league_pos_win_stats = {}
league_pos_loss_stats = {}
Opponent_pos_win_allowed_stats = {}
Opponent_pos_loss_allowed_stats = {}
playername_win_stats = {}
playername_loss_stats = {}
teamname_win_stats = {}
teamname_loss_stats = {}
if loop == 0:
for stats in calc_columns:
playername_win_stats[stats] = tables[tables['result'] == 1].groupby(['playername'])[stats].mean().to_dict()
playername_loss_stats[stats] = tables[tables['result'] == 0].groupby(['playername'])[stats].mean().to_dict()
teamname_win_stats[stats] = tables[(tables['result'] == 1) & (tables['position'] == 'team')].groupby(['teamname'])[stats].mean().to_dict()
teamname_loss_stats[stats] = tables[(tables['result'] == 0) & (tables['position'] == 'team')].groupby(['teamname'])[stats].mean().to_dict()
for stat in calc_columns:
column_name = f'playername_avg_{stat}_win'
tables[column_name] = tables.apply(
lambda row: playername_win_stats[stat].get(row['playername'], 0),
axis=1
)
column_name = f'playername_avg_{stat}_loss'
tables[column_name] = tables.apply(
lambda row: playername_loss_stats[stat].get(row['playername'], 0),
axis=1
)
column_name = f'teamname_avg_{stat}_win'
tables[column_name] = tables.apply(
lambda row: teamname_win_stats[stat].get(row['teamname'], 0),
axis=1
)
column_name = f'teamname_avg_{stat}_loss'
tables[column_name] = tables.apply(
lambda row: teamname_loss_stats[stat].get(row['teamname'], 0),
axis=1
)
tables['playername_avg_kill_share_win'] = tables['playername_avg_kills_win'] / tables['teamname_avg_kills_win']
tables['playername_avg_death_share_win'] = tables['playername_avg_deaths_win'] / tables['teamname_avg_deaths_win']
tables['playername_avg_assist_share_win'] = tables['playername_avg_assists_win'] / tables['teamname_avg_kills_win']
tables['playername_avg_cs_share_win'] = tables['playername_avg_total_cs_win'] / tables['teamname_avg_total_cs_win']
tables['playername_avg_kill_share_loss'] = tables['playername_avg_kills_loss'] / tables['teamname_avg_kills_loss']
tables['playername_avg_death_share_loss'] = tables['playername_avg_deaths_loss'] / tables['teamname_avg_deaths_loss']
tables['playername_avg_assist_share_loss'] = tables['playername_avg_assists_loss'] / tables['teamname_avg_kills_loss']
tables['playername_avg_cs_share_loss'] = tables['playername_avg_total_cs_loss'] / tables['teamname_avg_total_cs_loss']
player_tables = tables
else:
for stats in calc_columns:
league_pos_win_stats[stats] = {
league: group.groupby('position')[stats].mean().to_dict()
for league, group in tables[tables['result'] == 1].groupby('league')
}
league_pos_loss_stats[stats] = {
league: group.groupby('position')[stats].mean().to_dict()
for league, group in tables[tables['result'] == 0].groupby('league')
}
Opponent_pos_win_allowed_stats[stats] = {
opponent: group.groupby('position')[stats].mean().to_dict()
for opponent, group in tables[tables['result'] == 1].groupby('Opponent')
}
Opponent_pos_loss_allowed_stats[stats] = {
opponent: group.groupby('position')[stats].mean().to_dict()
for opponent, group in tables[tables['result'] == 0].groupby('Opponent')
}
for stat in calc_columns:
column_name = f'league_pos_avg_{stat}_win'
tables[column_name] = tables.apply(
lambda row: league_pos_win_stats[stat].get(row['league'], {}).get(row['position'], 0),
axis=1
)
column_name = f'league_pos_avg_{stat}_loss'
tables[column_name] = tables.apply(
lambda row: league_pos_loss_stats[stat].get(row['league'], {}).get(row['position'], 0),
axis=1
)
column_name = f'Opponent_pos_avg_{stat}_allowed_win'
tables[column_name] = tables.apply(
lambda row: Opponent_pos_win_allowed_stats[stat].get(row['Opponent'], {}).get(row['position'], 0),
axis=1
)
column_name = f'Opponent_pos_avg_{stat}_allowed_loss'
tables[column_name] = tables.apply(
lambda row: Opponent_pos_loss_allowed_stats[stat].get(row['Opponent'], {}).get(row['position'], 0),
axis=1
)
tables = tables[tables['Opponent'] == opponent]
tables['overall_win_kills_boost_pos'] = tables['Opponent_pos_avg_kills_allowed_win'] / tables['league_pos_avg_kills_win']
tables['overall_win_deaths_boost_pos'] = tables['Opponent_pos_avg_deaths_allowed_win'] / tables['league_pos_avg_deaths_win']
tables['overall_win_assists_boost_pos'] = tables['Opponent_pos_avg_assists_allowed_win'] / tables['league_pos_avg_assists_win']
tables['overall_win_total_cs_boost_pos'] = tables['Opponent_pos_avg_total_cs_allowed_win'] / tables['league_pos_avg_total_cs_win']
tables['overall_loss_kills_boost_pos'] = tables['Opponent_pos_avg_kills_allowed_loss'] / tables['league_pos_avg_kills_loss']
tables['overall_loss_deaths_boost_pos'] = tables['Opponent_pos_avg_deaths_allowed_loss'] / tables['league_pos_avg_deaths_loss']
tables['overall_loss_assists_boost_pos'] = tables['Opponent_pos_avg_assists_allowed_loss'] / tables['league_pos_avg_assists_loss']
tables['overall_loss_total_cs_boost_pos'] = tables['Opponent_pos_avg_total_cs_allowed_loss'] / tables['league_pos_avg_total_cs_loss']
opp_tables = tables
opp_pos_kills_boost_win = dict(zip(opp_tables['position'], opp_tables['overall_win_kills_boost_pos']))
opp_pos_deaths_boost_win = dict(zip(opp_tables['position'], opp_tables['overall_win_deaths_boost_pos']))
opp_pos_assists_boost_win = dict(zip(opp_tables['position'], opp_tables['overall_win_assists_boost_pos']))
opp_pos_cs_boost_win = dict(zip(opp_tables['position'], opp_tables['overall_win_total_cs_boost_pos']))
opp_pos_kills_boost_loss = dict(zip(opp_tables['position'], opp_tables['overall_loss_kills_boost_pos']))
opp_pos_deaths_boost_loss = dict(zip(opp_tables['position'], opp_tables['overall_loss_deaths_boost_pos']))
opp_pos_assists_boost_loss = dict(zip(opp_tables['position'], opp_tables['overall_loss_assists_boost_pos']))
opp_pos_cs_boost_loss = dict(zip(opp_tables['position'], opp_tables['overall_loss_total_cs_boost_pos']))
opp_boosts = pd.DataFrame({
'opp_pos_kills_boost_win': opp_pos_kills_boost_win,
'opp_pos_deaths_boost_win': opp_pos_deaths_boost_win,
'opp_pos_assists_boost_win': opp_pos_assists_boost_win,
'opp_pos_cs_boost_win': opp_pos_cs_boost_win,
'opp_pos_kills_boost_loss': opp_pos_kills_boost_loss,
'opp_pos_deaths_boost_loss': opp_pos_deaths_boost_loss,
'opp_pos_assists_boost_loss': opp_pos_assists_boost_loss,
'opp_pos_cs_boost_loss': opp_pos_cs_boost_loss
}).set_index(pd.Index(list(opp_pos_kills_boost_win.keys()), name='position'))
if kill_prediction > 0:
player_tables = player_tables[['playername', 'teamname', 'position', 'playername_avg_kill_share_win', 'playername_avg_death_share_win','playername_avg_assist_share_win',
'playername_avg_total_cs_win', 'playername_avg_kill_share_loss', 'playername_avg_death_share_loss', 'playername_avg_assist_share_loss', 'playername_avg_total_cs_loss']]
player_tables = player_tables.rename(columns = {'playername_avg_kill_share_win': 'wKill%', 'playername_avg_death_share_win': 'wDeath%', 'playername_avg_assist_share_win': 'wAssist%',
'playername_avg_total_cs_win': 'wCS', 'playername_avg_kill_share_loss': 'lKill%', 'playername_avg_death_share_loss': 'lDeath%',
'playername_avg_assist_share_loss': 'lAssist%', 'playername_avg_total_cs_loss': 'lCS'})
team_data = player_tables.drop_duplicates(subset = ['playername'])
if win_loss == "Win":
team_data['Kill_Proj'] = team_data.apply(lambda row: row['wKill%'] * opp_pos_kills_boost_win.get(row['position'], 1), axis=1)
team_data['Death_Proj'] = team_data.apply(lambda row: row['wDeath%'] * opp_pos_deaths_boost_win.get(row['position'], 1), axis=1)
team_data['Assist_Proj'] = team_data.apply(lambda row: row['wAssist%'] * opp_pos_assists_boost_win.get(row['position'], 1), axis=1)
team_data['CS_Proj'] = team_data.apply(lambda row: row['wCS'] * opp_pos_cs_boost_win.get(row['position'], 1), axis=1)
team_data = team_data[['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']]
else:
team_data['Kill_Proj'] = team_data.apply(lambda row: row['lKill%'] * opp_pos_kills_boost_loss.get(row['position'], 1), axis=1)
team_data['Death_Proj'] = team_data.apply(lambda row: row['lDeath%'] * opp_pos_deaths_boost_loss.get(row['position'], 1), axis=1)
team_data['Assist_Proj'] = team_data.apply(lambda row: row['lAssist%'] * opp_pos_assists_boost_loss.get(row['position'], 1), axis=1)
team_data['CS_Proj'] = team_data.apply(lambda row: row['lCS'] * opp_pos_cs_boost_loss.get(row['position'], 1), axis=1)
team_data = team_data[['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']]
else:
player_tables = player_tables[['playername', 'teamname', 'position', 'playername_avg_kills_win', 'playername_avg_deaths_win', 'playername_avg_assists_win', 'playername_avg_total_cs_win',
'playername_avg_kills_loss', 'playername_avg_deaths_loss', 'playername_avg_assists_loss', 'playername_avg_total_cs_loss']]
player_tables = player_tables.rename(columns = {'playername_avg_kills_win': 'wKill%', 'playername_avg_deaths_win': 'wDeath%', 'playername_avg_assists_win': 'wAssist%',
'playername_avg_total_cs_win': 'wCS', 'playername_avg_kills_loss': 'lKill%', 'playername_avg_deaths_loss': 'lDeath%',
'playername_avg_assists_loss': 'lAssist%', 'playername_avg_total_cs_loss': 'lCS'})
team_data = player_tables.drop_duplicates(subset = ['playername'])
if win_loss == "Win":
team_data['Kill_Proj'] = team_data.apply(lambda row: row['wKill%'] * opp_pos_kills_boost_win.get(row['position'], 1), axis=1)
team_data['Death_Proj'] = team_data.apply(lambda row: row['wDeath%'] * opp_pos_deaths_boost_win.get(row['position'], 1), axis=1)
team_data['Assist_Proj'] = team_data.apply(lambda row: row['wAssist%'] * opp_pos_assists_boost_win.get(row['position'], 1), axis=1)
team_data['CS_Proj'] = team_data.apply(lambda row: row['wCS'] * opp_pos_cs_boost_win.get(row['position'], 1), axis=1)
team_data = team_data[['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']]
else:
team_data['Kill_Proj'] = team_data.apply(lambda row: row['lKill%'] * opp_pos_kills_boost_loss.get(row['position'], 1), axis=1)
team_data['Death_Proj'] = team_data.apply(lambda row: row['lDeath%'] * opp_pos_deaths_boost_loss.get(row['position'], 1), axis=1)
team_data['Assist_Proj'] = team_data.apply(lambda row: row['lAssist%'] * opp_pos_assists_boost_loss.get(row['position'], 1), axis=1)
team_data['CS_Proj'] = team_data.apply(lambda row: row['lCS'] * opp_pos_cs_boost_loss.get(row['position'], 1), axis=1)
team_data = team_data[['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']]
return team_data.dropna().set_index('playername'), opp_boosts
if st.button("Run"):
team_data, opp_boost = init_team_data(selected_team, selected_opponent, win_loss, kill_prediction, death_prediction, start_date, end_date)
# Create simulated percentiles
sim_results = []
for idx, row in team_data.iterrows():
percentiles = simulate_stats(row)
sim_results.append({
'Player': idx,
'Position': row['position'],
'Stat': 'Kills',
'P10': percentiles['Kill_Proj'][0],
'P25': percentiles['Kill_Proj'][1],
'P50': percentiles['Kill_Proj'][2],
'P75': percentiles['Kill_Proj'][3],
'P90': percentiles['Kill_Proj'][4]
})
# Repeat for other stats
for stat, name in [('Death_Proj', 'Deaths'), ('Assist_Proj', 'Assists'), ('CS_Proj', 'CS')]:
sim_results.append({
'Player': idx,
'Position': row['position'],
'Stat': name,
'P10': percentiles[stat][0],
'P25': percentiles[stat][1],
'P50': percentiles[stat][2],
'P75': percentiles[stat][3],
'P90': percentiles[stat][4]
})
sim_df = pd.DataFrame(sim_results)
tab1, tab2 = st.tabs(["Team Data", "Opponent Data"])
with tab1:
st.dataframe(team_data.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(display_formats, precision=2), use_container_width = True)
with tab2:
st.dataframe(opp_boost.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), use_container_width = True)
st.dataframe(sim_df.style.format(precision=2), use_container_width=True)