Spaces:
Running
Running
James McCool
Refactor player data handling in init_team_data function of app.py. Updated results_dict assignment to drop NaN values, ensuring cleaner data output for each game iteration. Adjusted playername indexing to maintain clarity in player statistics during simulations. This change enhances the overall quality and usability of the player summary data.
026f31b
import streamlit as st | |
st.set_page_config(layout="wide") | |
import numpy as np | |
import pandas as pd | |
import pymongo | |
import time | |
from datetime import datetime, timedelta | |
from scipy import stats | |
def init_conn(): | |
uri = st.secrets['mongo_uri'] | |
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000) | |
db = client["League_of_Legends_Database"] | |
current_date = datetime.now() | |
collection = db["gamelogs"] | |
max_date = current_date - timedelta(days=1) | |
min_date = current_date - timedelta(days=365) | |
team_names = collection.distinct("teamname") | |
player_names = collection.distinct("playername") | |
return db, team_names, player_names, min_date, max_date | |
db, team_names, player_names, min_date, max_date = init_conn() | |
display_formats = {'wKill%': '{:.2%}', 'wDeath%': '{:.2%}', 'wAssist%': '{:.2%}', 'lKill%': '{:.2%}', 'lDeath%': '{:.2%}', 'lAssist%': '{:.2%}'} | |
# Create sidebar container for options | |
with st.sidebar: | |
st.header("Team Analysis Options") | |
# Date filtering options | |
st.subheader("Date Range") | |
date_filter = st.radio( | |
"Select Date Range", | |
["Last Year", "Custom Range"] | |
) | |
if date_filter == "Last Year": | |
end_date = max_date | |
start_date = (end_date - timedelta(days=365)) | |
else: | |
col1, col2 = st.columns(2) | |
with col1: | |
start_date = st.date_input( | |
"Start Date", | |
value=max_date.date() - timedelta(days=30), | |
min_value=min_date.date(), | |
max_value=max_date.date() | |
) | |
with col2: | |
end_date = st.date_input( | |
"End Date", | |
value=max_date.date(), | |
min_value=min_date.date(), | |
max_value=max_date.date() | |
) | |
col1, col2 = st.columns(2) | |
with col1: | |
selected_team = st.selectbox( | |
"Select Team", | |
options=team_names, | |
index=team_names.index("T1") if "T1" in team_names else 0 | |
) | |
with col2: | |
selected_opponent = st.selectbox( | |
"Select Opponent", | |
options=team_names, | |
index=team_names.index("T1") if "T1" in team_names else 0 | |
) | |
st.subheader("Prediction Settings") | |
num_games = st.selectbox( | |
"Is the match BO1, BO3, or BO5?", | |
options=["BO1", "BO3", "BO5"], | |
index=0 | |
) | |
# Convert BO format to number of games | |
game_count = int(num_games[2]) | |
# Create lists to store settings for each game | |
win_loss_settings = [] | |
game_settings_list = [] | |
kill_predictions = [] | |
death_predictions = [] | |
# Create a tab for each game | |
game_tabs = st.tabs([f"Game {i+1}" for i in range(game_count)]) | |
for game_num, game_tab in enumerate(game_tabs, 1): | |
with game_tab: | |
win_loss_settings.append(st.selectbox( | |
f"Game {game_num} Win/Loss", | |
options=["Win", "Loss"], | |
index=0, | |
key=f"win_loss_{game_num}" | |
)) | |
game_setting = st.selectbox( | |
f"Game {game_num} Prediction Type", | |
options=["Average", "Predict"], | |
index=0, | |
key=f"game_settings_{game_num}" | |
) | |
if game_setting == "Average": | |
kill_predictions.append(0) | |
death_predictions.append(0) | |
else: | |
col1, col2 = st.columns(2) | |
with col1: | |
kill_predictions.append(st.number_input( | |
f"Game {game_num} Predicted Team Kills", | |
min_value=1, | |
max_value=100, | |
value=20, | |
key=f"kills_{game_num}" | |
)) | |
with col2: | |
death_predictions.append(st.number_input( | |
f"Game {game_num} Predicted Team Deaths", | |
min_value=1, | |
max_value=100, | |
value=5, | |
key=f"deaths_{game_num}" | |
)) | |
def simulate_stats(row, num_sims=1000): | |
"""Simulate stats using normal distribution""" | |
# Using coefficient of variation of 0.3 to generate reasonable standard deviations | |
cv = 0.3 | |
percentiles = [10, 25, 50, 75, 90] | |
results = {} | |
for stat in ['Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']: | |
mean = row[stat] | |
std = mean * cv # Using coefficient of variation to determine std | |
sims = stats.norm.rvs(loc=mean, scale=std, size=num_sims) | |
# Ensure no negative values | |
sims = np.maximum(sims, 0) | |
results[stat] = np.percentile(sims, percentiles) | |
return pd.Series(results) | |
def init_team_data(team, opponent, win_loss_settings, kill_predictions, death_predictions, start_date, end_date): | |
game_count = len(kill_predictions) | |
overall_team_data = pd.DataFrame(columns = ['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']) | |
# Convert date objects to datetime strings in the correct format | |
start_datetime = datetime.combine(start_date, datetime.min.time()).strftime("%Y-%m-%d %H:%M:%S") | |
end_datetime = datetime.combine(end_date, datetime.max.time()).strftime("%Y-%m-%d %H:%M:%S") | |
collection = db["gamelogs"] | |
cursor = collection.find({"teamname": team, "date": {"$gte": start_datetime, "$lte": end_datetime}}) | |
raw_display = pd.DataFrame(list(cursor)) | |
cursor = collection.find({"date": {"$gte": start_datetime, "$lte": end_datetime}}) | |
raw_opponent = pd.DataFrame(list(cursor)) | |
tables_to_loop = [raw_display, raw_opponent] | |
for loop in range(len(tables_to_loop)): | |
tables = tables_to_loop[loop] | |
calc_columns = ['kills', 'deaths', 'assists', 'total_cs'] | |
league_pos_win_stats = {} | |
league_pos_loss_stats = {} | |
Opponent_pos_win_allowed_stats = {} | |
Opponent_pos_loss_allowed_stats = {} | |
playername_win_stats = {} | |
playername_loss_stats = {} | |
teamname_win_stats = {} | |
teamname_loss_stats = {} | |
if loop == 0: | |
for stats in calc_columns: | |
playername_win_stats[stats] = tables[tables['result'] == 1].groupby(['playername'])[stats].mean().to_dict() | |
playername_loss_stats[stats] = tables[tables['result'] == 0].groupby(['playername'])[stats].mean().to_dict() | |
teamname_win_stats[stats] = tables[(tables['result'] == 1) & (tables['position'] == 'team')].groupby(['teamname'])[stats].mean().to_dict() | |
teamname_loss_stats[stats] = tables[(tables['result'] == 0) & (tables['position'] == 'team')].groupby(['teamname'])[stats].mean().to_dict() | |
for stat in calc_columns: | |
column_name = f'playername_avg_{stat}_win' | |
tables[column_name] = tables.apply( | |
lambda row: playername_win_stats[stat].get(row['playername'], 0), | |
axis=1 | |
) | |
column_name = f'playername_avg_{stat}_loss' | |
tables[column_name] = tables.apply( | |
lambda row: playername_loss_stats[stat].get(row['playername'], 0), | |
axis=1 | |
) | |
column_name = f'teamname_avg_{stat}_win' | |
tables[column_name] = tables.apply( | |
lambda row: teamname_win_stats[stat].get(row['teamname'], 0), | |
axis=1 | |
) | |
column_name = f'teamname_avg_{stat}_loss' | |
tables[column_name] = tables.apply( | |
lambda row: teamname_loss_stats[stat].get(row['teamname'], 0), | |
axis=1 | |
) | |
tables['playername_avg_kill_share_win'] = tables['playername_avg_kills_win'] / tables['teamname_avg_kills_win'] | |
tables['playername_avg_death_share_win'] = tables['playername_avg_deaths_win'] / tables['teamname_avg_deaths_win'] | |
tables['playername_avg_assist_share_win'] = tables['playername_avg_assists_win'] / tables['teamname_avg_kills_win'] | |
tables['playername_avg_cs_share_win'] = tables['playername_avg_total_cs_win'] / tables['teamname_avg_total_cs_win'] | |
tables['playername_avg_kill_share_loss'] = tables['playername_avg_kills_loss'] / tables['teamname_avg_kills_loss'] | |
tables['playername_avg_death_share_loss'] = tables['playername_avg_deaths_loss'] / tables['teamname_avg_deaths_loss'] | |
tables['playername_avg_assist_share_loss'] = tables['playername_avg_assists_loss'] / tables['teamname_avg_kills_loss'] | |
tables['playername_avg_cs_share_loss'] = tables['playername_avg_total_cs_loss'] / tables['teamname_avg_total_cs_loss'] | |
player_tables = tables | |
else: | |
for stats in calc_columns: | |
league_pos_win_stats[stats] = { | |
league: group.groupby('position')[stats].mean().to_dict() | |
for league, group in tables[tables['result'] == 1].groupby('league') | |
} | |
league_pos_loss_stats[stats] = { | |
league: group.groupby('position')[stats].mean().to_dict() | |
for league, group in tables[tables['result'] == 0].groupby('league') | |
} | |
Opponent_pos_win_allowed_stats[stats] = { | |
opponent: group.groupby('position')[stats].mean().to_dict() | |
for opponent, group in tables[tables['result'] == 1].groupby('Opponent') | |
} | |
Opponent_pos_loss_allowed_stats[stats] = { | |
opponent: group.groupby('position')[stats].mean().to_dict() | |
for opponent, group in tables[tables['result'] == 0].groupby('Opponent') | |
} | |
for stat in calc_columns: | |
column_name = f'league_pos_avg_{stat}_win' | |
tables[column_name] = tables.apply( | |
lambda row: league_pos_win_stats[stat].get(row['league'], {}).get(row['position'], 0), | |
axis=1 | |
) | |
column_name = f'league_pos_avg_{stat}_loss' | |
tables[column_name] = tables.apply( | |
lambda row: league_pos_loss_stats[stat].get(row['league'], {}).get(row['position'], 0), | |
axis=1 | |
) | |
column_name = f'Opponent_pos_avg_{stat}_allowed_win' | |
tables[column_name] = tables.apply( | |
lambda row: Opponent_pos_win_allowed_stats[stat].get(row['Opponent'], {}).get(row['position'], 0), | |
axis=1 | |
) | |
column_name = f'Opponent_pos_avg_{stat}_allowed_loss' | |
tables[column_name] = tables.apply( | |
lambda row: Opponent_pos_loss_allowed_stats[stat].get(row['Opponent'], {}).get(row['position'], 0), | |
axis=1 | |
) | |
tables = tables[tables['Opponent'] == opponent] | |
tables['overall_win_kills_boost_pos'] = tables['Opponent_pos_avg_kills_allowed_win'] / tables['league_pos_avg_kills_win'] | |
tables['overall_win_deaths_boost_pos'] = tables['Opponent_pos_avg_deaths_allowed_win'] / tables['league_pos_avg_deaths_win'] | |
tables['overall_win_assists_boost_pos'] = tables['Opponent_pos_avg_assists_allowed_win'] / tables['league_pos_avg_assists_win'] | |
tables['overall_win_total_cs_boost_pos'] = tables['Opponent_pos_avg_total_cs_allowed_win'] / tables['league_pos_avg_total_cs_win'] | |
tables['overall_loss_kills_boost_pos'] = tables['Opponent_pos_avg_kills_allowed_loss'] / tables['league_pos_avg_kills_loss'] | |
tables['overall_loss_deaths_boost_pos'] = tables['Opponent_pos_avg_deaths_allowed_loss'] / tables['league_pos_avg_deaths_loss'] | |
tables['overall_loss_assists_boost_pos'] = tables['Opponent_pos_avg_assists_allowed_loss'] / tables['league_pos_avg_assists_loss'] | |
tables['overall_loss_total_cs_boost_pos'] = tables['Opponent_pos_avg_total_cs_allowed_loss'] / tables['league_pos_avg_total_cs_loss'] | |
opp_tables = tables | |
opp_pos_kills_boost_win = dict(zip(opp_tables['position'], opp_tables['overall_win_kills_boost_pos'])) | |
opp_pos_deaths_boost_win = dict(zip(opp_tables['position'], opp_tables['overall_win_deaths_boost_pos'])) | |
opp_pos_assists_boost_win = dict(zip(opp_tables['position'], opp_tables['overall_win_assists_boost_pos'])) | |
opp_pos_cs_boost_win = dict(zip(opp_tables['position'], opp_tables['overall_win_total_cs_boost_pos'])) | |
opp_pos_kills_boost_loss = dict(zip(opp_tables['position'], opp_tables['overall_loss_kills_boost_pos'])) | |
opp_pos_deaths_boost_loss = dict(zip(opp_tables['position'], opp_tables['overall_loss_deaths_boost_pos'])) | |
opp_pos_assists_boost_loss = dict(zip(opp_tables['position'], opp_tables['overall_loss_assists_boost_pos'])) | |
opp_pos_cs_boost_loss = dict(zip(opp_tables['position'], opp_tables['overall_loss_total_cs_boost_pos'])) | |
opp_boosts = pd.DataFrame({ | |
'opp_pos_kills_boost_win': opp_pos_kills_boost_win, | |
'opp_pos_deaths_boost_win': opp_pos_deaths_boost_win, | |
'opp_pos_assists_boost_win': opp_pos_assists_boost_win, | |
'opp_pos_cs_boost_win': opp_pos_cs_boost_win, | |
'opp_pos_kills_boost_loss': opp_pos_kills_boost_loss, | |
'opp_pos_deaths_boost_loss': opp_pos_deaths_boost_loss, | |
'opp_pos_assists_boost_loss': opp_pos_assists_boost_loss, | |
'opp_pos_cs_boost_loss': opp_pos_cs_boost_loss | |
}).set_index(pd.Index(list(opp_pos_kills_boost_win.keys()), name='position')) | |
results_dict = {} | |
for game in range(game_count): | |
if kill_predictions[game] > 0: | |
working_tables = player_tables[['playername', 'teamname', 'position', 'playername_avg_kill_share_win', 'playername_avg_death_share_win','playername_avg_assist_share_win', | |
'playername_avg_total_cs_win', 'playername_avg_kill_share_loss', 'playername_avg_death_share_loss', 'playername_avg_assist_share_loss', 'playername_avg_total_cs_loss']] | |
working_tables = working_tables.rename(columns = {'playername_avg_kill_share_win': 'wKill%', 'playername_avg_death_share_win': 'wDeath%', 'playername_avg_assist_share_win': 'wAssist%', | |
'playername_avg_total_cs_win': 'wCS', 'playername_avg_kill_share_loss': 'lKill%', 'playername_avg_death_share_loss': 'lDeath%', | |
'playername_avg_assist_share_loss': 'lAssist%', 'playername_avg_total_cs_loss': 'lCS'}) | |
team_data = working_tables.drop_duplicates(subset = ['playername']) | |
if win_loss_settings[game] == "Win": | |
team_data['Kill_Proj'] = team_data.apply(lambda row: row['wKill%'] * opp_pos_kills_boost_win.get(row['position'], 1), axis=1) * kill_predictions[game] | |
team_data['Death_Proj'] = team_data.apply(lambda row: row['wDeath%'] * opp_pos_deaths_boost_win.get(row['position'], 1), axis=1) * death_predictions[game] | |
team_data['Assist_Proj'] = team_data.apply(lambda row: row['wAssist%'] * opp_pos_assists_boost_win.get(row['position'], 1), axis=1) * kill_predictions[game] | |
team_data['CS_Proj'] = team_data.apply(lambda row: row['wCS'] * opp_pos_cs_boost_win.get(row['position'], 1), axis=1) | |
team_data = team_data[['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']] | |
else: | |
team_data['Kill_Proj'] = team_data.apply(lambda row: row['lKill%'] * opp_pos_kills_boost_loss.get(row['position'], 1), axis=1) * kill_predictions[game] | |
team_data['Death_Proj'] = team_data.apply(lambda row: row['lDeath%'] * opp_pos_deaths_boost_loss.get(row['position'], 1), axis=1) * death_predictions[game] | |
team_data['Assist_Proj'] = team_data.apply(lambda row: row['lAssist%'] * opp_pos_assists_boost_loss.get(row['position'], 1), axis=1) * kill_predictions[game] | |
team_data['CS_Proj'] = team_data.apply(lambda row: row['lCS'] * opp_pos_cs_boost_loss.get(row['position'], 1), axis=1) | |
team_data = team_data[['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']] | |
else: | |
working_tables = player_tables[['playername', 'teamname', 'position', 'playername_avg_kills_win', 'playername_avg_deaths_win', 'playername_avg_assists_win', 'playername_avg_total_cs_win', | |
'playername_avg_kills_loss', 'playername_avg_deaths_loss', 'playername_avg_assists_loss', 'playername_avg_total_cs_loss']] | |
working_tables = working_tables.rename(columns = {'playername_avg_kills_win': 'wKill%', 'playername_avg_deaths_win': 'wDeath%', 'playername_avg_assists_win': 'wAssist%', | |
'playername_avg_total_cs_win': 'wCS', 'playername_avg_kills_loss': 'lKill%', 'playername_avg_deaths_loss': 'lDeath%', | |
'playername_avg_assists_loss': 'lAssist%', 'playername_avg_total_cs_loss': 'lCS'}) | |
team_data = working_tables.drop_duplicates(subset = ['playername']) | |
if win_loss_settings[game] == "Win": | |
team_data['Kill_Proj'] = team_data.apply(lambda row: row['wKill%'] * opp_pos_kills_boost_win.get(row['position'], 1), axis=1) | |
team_data['Death_Proj'] = team_data.apply(lambda row: row['wDeath%'] * opp_pos_deaths_boost_win.get(row['position'], 1), axis=1) | |
team_data['Assist_Proj'] = team_data.apply(lambda row: row['wAssist%'] * opp_pos_assists_boost_win.get(row['position'], 1), axis=1) | |
team_data['CS_Proj'] = team_data.apply(lambda row: row['wCS'] * opp_pos_cs_boost_win.get(row['position'], 1), axis=1) | |
team_data = team_data[['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']] | |
else: | |
team_data['Kill_Proj'] = team_data.apply(lambda row: row['lKill%'] * opp_pos_kills_boost_loss.get(row['position'], 1), axis=1) | |
team_data['Death_Proj'] = team_data.apply(lambda row: row['lDeath%'] * opp_pos_deaths_boost_loss.get(row['position'], 1), axis=1) | |
team_data['Assist_Proj'] = team_data.apply(lambda row: row['lAssist%'] * opp_pos_assists_boost_loss.get(row['position'], 1), axis=1) | |
team_data['CS_Proj'] = team_data.apply(lambda row: row['lCS'] * opp_pos_cs_boost_loss.get(row['position'], 1), axis=1) | |
team_data = team_data[['playername', 'teamname', 'position', 'Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']] | |
results_dict[f'game {game + 1}'] = team_data.dropna() | |
team_data['playername'] = team_data['playername'] + f' game {game + 1}' | |
overall_team_data = pd.concat([overall_team_data, team_data]) | |
return overall_team_data.dropna().set_index('playername'), opp_boosts, results_dict | |
if st.button("Run"): | |
team_data, opp_boost, results_dict = init_team_data(selected_team, selected_opponent, win_loss_settings, kill_predictions, death_predictions, start_date, end_date) | |
player_summary = pd.DataFrame() | |
for game_num, game_df in results_dict.items(): | |
# Remove 'game X' from playernames if present | |
clean_df = game_df.copy() | |
clean_df['playername'] = clean_df['playername'].str.split(' game ').str[0] | |
if player_summary.empty: | |
player_summary = clean_df | |
else: | |
# Add the stats to existing players | |
player_summary.update(clean_df) # Update teamname and position if needed | |
for col in ['Kill_Proj', 'Death_Proj', 'Assist_Proj', 'CS_Proj']: | |
player_summary[col] += clean_df[col] | |
player_summary = player_summary.set_index('playername') | |
# Create simulated percentiles | |
sim_results = [] | |
for idx, row in team_data.iterrows(): | |
percentiles = simulate_stats(row) | |
sim_results.append({ | |
'Player': idx, | |
'Position': row['position'], | |
'Stat': 'Kills', | |
'10%': percentiles['Kill_Proj'][0], | |
'25%': percentiles['Kill_Proj'][1], | |
'50%': percentiles['Kill_Proj'][2], | |
'75%': percentiles['Kill_Proj'][3], | |
'90%': percentiles['Kill_Proj'][4] | |
}) | |
# Repeat for other stats | |
for stat, name in [('Death_Proj', 'Deaths'), ('Assist_Proj', 'Assists'), ('CS_Proj', 'CS')]: | |
sim_results.append({ | |
'Player': idx, | |
'Position': row['position'], | |
'Stat': name, | |
'10%': percentiles[stat][0], | |
'25%': percentiles[stat][1], | |
'50%': percentiles[stat][2], | |
'75%': percentiles[stat][3], | |
'90%': percentiles[stat][4] | |
}) | |
sim_df = pd.DataFrame(sim_results) | |
tab1, tab2 = st.tabs(["Team Data", "Opponent Data"]) | |
with tab1: | |
st.dataframe(player_summary.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(display_formats, precision=2), use_container_width = True) | |
st.dataframe(team_data.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(display_formats, precision=2), use_container_width = True) | |
with tab2: | |
st.dataframe(opp_boost.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), use_container_width = True) | |
unique_players = sim_df['Player'].unique().tolist() | |
player_tabs = st.tabs(unique_players) | |
for player, tab in zip(unique_players, player_tabs): | |
with tab: | |
player_data = sim_df[sim_df['Player'] == player] | |
player_data = player_data.set_index('Stat') | |
st.dataframe( | |
player_data[['10%', '25%', '50%', '75%', '90%']] | |
.style.format(precision=2), | |
use_container_width=True | |
) |