James McCool
Refactor player data calculations in app.py for improved clarity and efficiency
5f9c332
import streamlit as st | |
st.set_page_config(layout="wide") | |
import numpy as np | |
import pandas as pd | |
import time | |
from fuzzywuzzy import process | |
from collections import Counter | |
## import global functions | |
from global_func.clean_player_name import clean_player_name | |
from global_func.load_contest_file import load_contest_file | |
from global_func.load_file import load_file | |
from global_func.load_ss_file import load_ss_file | |
from global_func.find_name_mismatches import find_name_mismatches | |
from global_func.predict_dupes import predict_dupes | |
from global_func.highlight_rows import highlight_changes, highlight_changes_winners, highlight_changes_losers | |
from global_func.load_csv import load_csv | |
from global_func.find_csv_mismatches import find_csv_mismatches | |
tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"]) | |
with tab1: | |
if st.button('Clear data', key='reset1'): | |
st.session_state.clear() | |
sport_select = st.selectbox("Select Sport", ['MLB', 'NBA', 'NFL']) | |
# Add file uploaders to your app | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.subheader("Contest File") | |
st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.") | |
Contest_file = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls']) | |
if 'Contest' in st.session_state: | |
del st.session_state['Contest'] | |
if Contest_file: | |
st.session_state['Contest'], st.session_state['ownership_dict'], st.session_state['actual_dict'], st.session_state['entry_list'] = load_contest_file(Contest_file, sport_select) | |
st.session_state['Contest'] = st.session_state['Contest'].dropna(how='all') | |
st.session_state['Contest'] = st.session_state['Contest'].reset_index(drop=True) | |
if st.session_state['Contest'] is not None: | |
st.success('Contest file loaded successfully!') | |
st.dataframe(st.session_state['Contest'].head(10)) | |
with col2: | |
st.subheader("Projections File") | |
st.info("upload a projections file that has 'player_names', 'salary', 'median', 'ownership', and 'captain ownership' (Needed for Showdown) columns. Note that the salary for showdown needs to be the FLEX salary, not the captain salary.") | |
# Create two columns for the uploader and template button | |
upload_col, template_col = st.columns([3, 1]) | |
with upload_col: | |
projections_file = st.file_uploader("Upload Projections File (CSV or Excel)", type=['csv', 'xlsx', 'xls']) | |
if 'projections_df' in st.session_state: | |
del st.session_state['projections_df'] | |
with template_col: | |
# Create empty DataFrame with required columns | |
template_df = pd.DataFrame(columns=['player_names', 'position', 'team', 'salary', 'median', 'ownership', 'captain ownership']) | |
# Add download button for template | |
st.download_button( | |
label="Template", | |
data=template_df.to_csv(index=False), | |
file_name="projections_template.csv", | |
mime="text/csv" | |
) | |
if projections_file: | |
export_projections, projections = load_file(projections_file) | |
if projections is not None: | |
st.success('Projections file loaded successfully!') | |
st.dataframe(projections.head(10)) | |
if Contest_file and projections_file: | |
if st.session_state['Contest'] is not None and projections is not None: | |
st.subheader("Name Matching functions") | |
# Initialize projections_df in session state if it doesn't exist | |
if 'projections_df' not in st.session_state: | |
st.session_state['projections_df'] = projections.copy() | |
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int)) | |
# Run name matching only once when first loading the files | |
st.session_state['Contest'], st.session_state['projections_df'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df']) | |
with tab2: | |
if st.button('Clear data', key='reset3'): | |
st.session_state.clear() | |
if 'Contest' in st.session_state and 'projections_df' in st.session_state: | |
col1, col2 = st.columns([1, 8]) | |
excluded_cols = ['BaseName', 'EntryCount'] | |
player_columns = [col for col in st.session_state['Contest'].columns if col not in excluded_cols] | |
# Create mapping dictionaries | |
map_dict = { | |
'pos_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])), | |
'team_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])), | |
'salary_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])), | |
'proj_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])), | |
'own_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])), | |
'own_percent_rank': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))), | |
'cpt_salary_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])), | |
'cpt_proj_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)), | |
'cpt_own_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership'])) | |
} | |
# Create a copy of the dataframe for calculations | |
working_df = st.session_state['Contest'].copy() | |
with col1: | |
with st.expander("Info and filters"): | |
with st.form(key='filter_form'): | |
type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown']) | |
entry_parse_var = st.selectbox("Do you want to view a specific player(s) or a group of players?", ['All', 'Specific']) | |
entry_names = st.multiselect("Select players", options=st.session_state['entry_list'], default=[]) | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
# Apply entry name filter if specific entries are selected | |
if entry_parse_var == 'Specific' and entry_names: | |
working_df = working_df[working_df['BaseName'].isin(entry_names)] | |
# Calculate metrics based on game type | |
if type_var == 'Classic': | |
working_df['stack'] = working_df.apply( | |
lambda row: Counter( | |
map_dict['team_map'].get(player, '') for player in row | |
if map_dict['team_map'].get(player, '') != '' | |
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row) else '', | |
axis=1 | |
) | |
working_df['stack_size'] = working_df.apply( | |
lambda row: Counter( | |
map_dict['team_map'].get(player, '') for player in row | |
if map_dict['team_map'].get(player, '') != '' | |
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row) else '', | |
axis=1 | |
) | |
working_df['salary'] = working_df.apply(lambda row: sum(map_dict['salary_map'].get(player, 0) for player in row), axis=1) | |
working_df['median'] = working_df.apply(lambda row: sum(map_dict['proj_map'].get(player, 0) for player in row), axis=1) | |
working_df['actual'] = working_df.apply(lambda row: sum(st.session_state['actual_dict'].get(player, 0) for player in row), axis=1) | |
working_df['Own'] = working_df.apply(lambda row: sum(map_dict['own_map'].get(player, 0) for player in row), axis=1) | |
working_df['sorted'] = working_df[player_columns].apply( | |
lambda row: ','.join(sorted(row.values)), | |
axis=1 | |
) | |
working_df['dupes'] = working_df.groupby('sorted').transform('size') | |
working_df = working_df.drop('sorted', axis=1) | |
elif type_var == 'Showdown': | |
working_df['stack'] = working_df.apply( | |
lambda row: Counter( | |
map_dict['team_map'].get(player, '') for player in row | |
if map_dict['team_map'].get(player, '') != '' | |
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row) else '', | |
axis=1 | |
) | |
working_df['stack_size'] = working_df.apply( | |
lambda row: Counter( | |
map_dict['team_map'].get(player, '') for player in row | |
if map_dict['team_map'].get(player, '') != '' | |
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row) else '', | |
axis=1 | |
) | |
working_df['salary'] = working_df.apply( | |
lambda row: map_dict['cpt_salary_map'].get(row.iloc[0], 0) + | |
sum(map_dict['salary_map'].get(player, 0) for player in row.iloc[1:]), | |
axis=1 | |
) | |
working_df['median'] = working_df.apply( | |
lambda row: map_dict['cpt_proj_map'].get(row.iloc[0], 0) + | |
sum(map_dict['proj_map'].get(player, 0) for player in row.iloc[1:]), | |
axis=1 | |
) | |
working_df['Own'] = working_df.apply( | |
lambda row: map_dict['cpt_own_map'].get(row.iloc[0], 0) + | |
sum(map_dict['own_map'].get(player, 0) for player in row.iloc[1:]), | |
axis=1 | |
) | |
working_df['sorted'] = working_df[player_columns].apply( | |
lambda row: row[0] + '|' + ','.join(sorted(row[1:].values)), | |
axis=1 | |
) | |
working_df['dupes'] = working_df.groupby('sorted').transform('size') | |
working_df = working_df.drop('sorted', axis=1) | |
contest_players = set() | |
players_1per = set() | |
players_5per = set() | |
players_10per = set() | |
players_20per = set() | |
for col in player_columns: | |
contest_players = working_df.copy() | |
players_1per = working_df.nlargest(n=int(len(working_df) * 0.01), columns='actual') | |
players_5per = working_df.nlargest(n=int(len(working_df) * 0.05), columns='actual') | |
players_10per = working_df.nlargest(n=int(len(working_df) * 0.10), columns='actual') | |
players_20per = working_df.nlargest(n=int(len(working_df) * 0.20), columns='actual') | |
with st.container(): | |
tab1, tab2 = st.tabs(['Player Used Info', 'Stack Used Info']) | |
with tab1: | |
player_counts = pd.Series(list(contest_players[player_columns].values.flatten())).value_counts() | |
st.write(player_counts) | |
player_frame = player_counts.to_frame().reset_index().rename(columns={'index': 'Player', 0: 'Count'}) | |
player_frame['Percent'] = player_frame['Count'] / len(working_df) | |
player_frame = player_frame[['Player', 'Count', 'Percent']] | |
st.dataframe(player_frame) | |
with tab2: | |
stack_counts = pd.Series(list(working_df['stack'].unique())).value_counts() | |
st.write(stack_counts) | |
stack_frame = stack_counts.to_frame().reset_index().rename(columns={'index': 'Stack', 0: 'Count'}) | |
stack_frame['Percent'] = stack_frame['Count'] / len(working_df) | |
stack_frame = stack_frame[['Stack', 'Count', 'Percent']] | |
st.dataframe(stack_frame) | |
# Initialize pagination in session state if not exists | |
if 'current_page' not in st.session_state: | |
st.session_state.current_page = 0 | |
# Calculate total pages | |
rows_per_page = 500 | |
total_rows = len(working_df) | |
total_pages = (total_rows + rows_per_page - 1) // rows_per_page | |
# Create pagination controls in a single row | |
pagination_cols = st.columns([4, 1, 1, 1, 4]) | |
with pagination_cols[1]: | |
if st.button("β Previous", disabled=st.session_state.current_page == 0): | |
st.session_state.current_page -= 1 | |
with pagination_cols[2]: | |
st.markdown(f"**Page {st.session_state.current_page + 1} of {total_pages}**", unsafe_allow_html=True) | |
with pagination_cols[3]: | |
if st.button("Next β", disabled=st.session_state.current_page == total_pages - 1): | |
st.session_state.current_page += 1 | |
# Calculate start and end indices for current page | |
start_idx = st.session_state.current_page * rows_per_page | |
end_idx = min((st.session_state.current_page + 1) * rows_per_page, total_rows) | |
# Display the paginated dataframe | |
st.dataframe( | |
working_df.iloc[start_idx:end_idx].style | |
.background_gradient(axis=0) | |
.background_gradient(cmap='RdYlGn') | |
.format(precision=2), | |
height=1000, | |
use_container_width=True, | |
hide_index=True | |
) | |