James McCool
Update salary and actual fantasy points calculations in app.py to apply a 1.5x multiplier for the first player in each row. This change enhances the accuracy of player evaluations in the data processing workflow.
abd1ae1
import streamlit as st | |
st.set_page_config(layout="wide") | |
import numpy as np | |
import pandas as pd | |
from rapidfuzz import process, fuzz | |
from collections import Counter | |
from pymongo.mongo_client import MongoClient | |
from pymongo.server_api import ServerApi | |
from datetime import datetime | |
def init_conn(): | |
uri = st.secrets['mongo_uri'] | |
client = MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000) | |
db = client['Contest_Information'] | |
return db | |
def grab_contest_names(db, sport, type): | |
if type == 'Classic': | |
db_type = 'reg' | |
elif type == 'Showdown': | |
db_type = 'sd' | |
collection = db[f'{sport}_{db_type}_contest_info'] | |
cursor = collection.find() | |
curr_info = pd.DataFrame(list(cursor)).drop('_id', axis=1) | |
curr_info['Date'] = pd.to_datetime(curr_info['Contest Date'].sort_values(ascending = False)) | |
curr_info['Date'] = curr_info['Date'].dt.strftime('%Y-%m-%d') | |
contest_names = curr_info['Contest Name'] | |
contest_id_map = dict(zip(curr_info['Contest Name'], curr_info['Contest ID'])) | |
return contest_names, contest_id_map, curr_info | |
def grab_contest_player_info(db, sport, type, contest_date, contest_name, contest_id_map): | |
if type == 'Classic': | |
db_type = 'reg' | |
elif type == 'Showdown': | |
db_type = 'showdown' | |
collection = db[f'{sport}_{db_type}_player_info'] | |
cursor = collection.find() | |
player_info = pd.DataFrame(list(cursor)).drop('_id', axis=1) | |
player_info = player_info[player_info['Contest Date'] == contest_date] | |
try: | |
player_info = player_info[player_info['Contest ID'] == contest_id_map[contest_name]] | |
except: | |
pass | |
player_info = player_info.rename(columns={'Display Name': 'Player'}) | |
player_info = player_info.sort_values(by='Salary', ascending=True).drop_duplicates(subset='Player', keep='first') | |
info_maps = { | |
'position_dict': dict(zip(player_info['Player'], player_info['Position'])), | |
'salary_dict': dict(zip(player_info['Player'], player_info['Salary'])), | |
'team_dict': dict(zip(player_info['Player'], player_info['Team'])), | |
'opp_dict': dict(zip(player_info['Player'], player_info['Opp'])), | |
'fpts_avg_dict': dict(zip(player_info['Player'], player_info['Avg FPTS'])) | |
} | |
return player_info, info_maps | |
db = init_conn() | |
## import global functions | |
from global_func.load_contest_file import load_contest_file | |
from global_func.create_player_exposures import create_player_exposures | |
from global_func.create_stack_exposures import create_stack_exposures | |
from global_func.create_stack_size_exposures import create_stack_size_exposures | |
from global_func.create_general_exposures import create_general_exposures | |
from global_func.grab_contest_data import grab_contest_data | |
def is_valid_input(file): | |
if isinstance(file, pd.DataFrame): | |
return not file.empty | |
else: | |
return file is not None # For Streamlit uploader objects | |
player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'} | |
tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"]) | |
with tab1: | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.button('Clear data', key='reset1'): | |
st.session_state.clear() | |
search_options, sport_options, date_options = st.columns(3) | |
with search_options: | |
parse_type = st.selectbox("Manual upload or DB search?", ['DB Search', 'Manual'], key='parse_type') | |
with sport_options: | |
sport_select = st.selectbox("Select Sport", ['MLB', 'MMA', 'GOLF'], key='sport_select') | |
type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'], key='type_var') | |
contest_names, contest_id_map, curr_info = grab_contest_names(db, sport_select, type_var) | |
with date_options: | |
date_list = curr_info['Date'].sort_values(ascending=False).unique() | |
date_list = date_list[date_list != pd.Timestamp.today().strftime('%Y-%m-%d')] | |
date_select = st.selectbox("Select Date", date_list, key='date_select') | |
date_select2 = (pd.to_datetime(date_select) + pd.Timedelta(days=1)).strftime('%Y-%m-%d') | |
name_parse = curr_info[curr_info['Date'] == date_select]['Contest Name'].reset_index(drop=True) | |
date_select = date_select.replace('-', '') | |
date_select2 = date_select2.replace('-', '') | |
contest_name_var = st.selectbox("Select Contest to load", name_parse) | |
if parse_type == 'DB Search': | |
if 'Contest_file_helper' in st.session_state: | |
del st.session_state['Contest_file_helper'] | |
if 'Contest_file' in st.session_state: | |
del st.session_state['Contest_file'] | |
if 'Contest_file' not in st.session_state: | |
if st.button('Load Contest Data', key='load_contest_data'): | |
st.session_state['player_info'], st.session_state['info_maps'] = grab_contest_player_info(db, sport_select, type_var, date_select, contest_name_var, contest_id_map) | |
st.session_state['Contest_file'] = grab_contest_data(sport_select, contest_name_var, contest_id_map, date_select, date_select2) | |
else: | |
pass | |
with col2: | |
st.info(f"If you are manually loading and do not have the results CSV for the contest you selected, you can find it here: https://www.draftkings.com/contest/gamecenter/{contest_id_map[contest_name_var]}#/") | |
if parse_type == 'Manual': | |
if 'Contest_file_helper' in st.session_state: | |
del st.session_state['Contest_file_helper'] | |
if 'Contest_file' in st.session_state: | |
del st.session_state['Contest_file'] | |
if 'Contest_file' not in st.session_state: | |
st.session_state['Contest_upload'] = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls']) | |
st.session_state['player_info'], st.session_state['info_maps'] = grab_contest_player_info(db, sport_select, type_var, date_select, contest_name_var, contest_id_map) | |
st.session_state['Contest_file'] = pd.read_csv(st.session_state['Contest_upload']) | |
else: | |
pass | |
if 'Contest_file' in st.session_state: | |
st.session_state['Contest'], st.session_state['ownership_df'], st.session_state['actual_df'], st.session_state['entry_list'], check_lineups = load_contest_file(st.session_state['Contest_file'], type_var, st.session_state['player_info'], sport_select) | |
st.session_state['Contest'] = st.session_state['Contest'].dropna(how='all') | |
st.session_state['Contest'] = st.session_state['Contest'].reset_index(drop=True) | |
if st.session_state['Contest'] is not None: | |
st.success('Contest file loaded successfully!') | |
st.dataframe(st.session_state['Contest'].head(100)) | |
if 'Contest_file' in st.session_state: | |
st.session_state['ownership_dict'] = dict(zip(st.session_state['ownership_df']['Player'], st.session_state['ownership_df']['Own'])) | |
st.session_state['actual_dict'] = dict(zip(st.session_state['actual_df']['Player'], st.session_state['actual_df']['FPTS'])) | |
if 'Contest_file_helper' not in st.session_state: | |
st.session_state['salary_dict'] = st.session_state['info_maps']['salary_dict'] | |
st.session_state['team_dict'] = st.session_state['info_maps']['team_dict'] | |
st.session_state['pos_dict'] = st.session_state['info_maps']['position_dict'] | |
else: | |
st.session_state['salary_dict'] = dict(zip(st.session_state['salary_df']['Player'], st.session_state['salary_df']['Salary'])) | |
st.session_state['team_dict'] = dict(zip(st.session_state['team_df']['Player'], st.session_state['team_df']['Team'])) | |
st.session_state['pos_dict'] = dict(zip(st.session_state['pos_df']['Player'], st.session_state['pos_df']['Pos'])) | |
st.table(st.session_state['salary_dict']) | |
with tab2: | |
excluded_cols = ['BaseName', 'EntryCount'] | |
if 'Contest' in st.session_state: | |
player_columns = [col for col in st.session_state['Contest'].columns if col not in excluded_cols] | |
for col in player_columns: | |
st.session_state['Contest'][col] = st.session_state['Contest'][col].astype(str) | |
# Create mapping dictionaries | |
map_dict = { | |
'pos_map': st.session_state['pos_dict'], | |
'team_map': st.session_state['team_dict'], | |
'salary_map': st.session_state['salary_dict'], | |
'own_map': st.session_state['ownership_dict'], | |
'own_percent_rank': dict(zip(st.session_state['ownership_df']['Player'], st.session_state['ownership_df']['Own'].rank(pct=True))) | |
} | |
# Create a copy of the dataframe for calculations | |
working_df = st.session_state['Contest'].copy() | |
if type_var == 'Classic': | |
working_df['stack'] = working_df.apply( | |
lambda row: Counter( | |
map_dict['team_map'].get(player, '') for player in row[4:] | |
if map_dict['team_map'].get(player, '') != '' | |
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row[4:]) else '', | |
axis=1 | |
) | |
working_df['stack_size'] = working_df.apply( | |
lambda row: Counter( | |
map_dict['team_map'].get(player, '') for player in row[4:] | |
if map_dict['team_map'].get(player, '') != '' | |
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row[4:]) else '', | |
axis=1 | |
) | |
working_df['salary'] = working_df.apply(lambda row: sum(map_dict['salary_map'].get(player, 0) for player in row), axis=1) | |
working_df['actual_fpts'] = working_df.apply(lambda row: sum(st.session_state['actual_dict'].get(player, 0) for player in row), axis=1) | |
working_df['actual_own'] = working_df.apply(lambda row: sum(st.session_state['ownership_dict'].get(player, 0) for player in row), axis=1) | |
working_df['sorted'] = working_df[player_columns].apply( | |
lambda row: ','.join(sorted(row.values)), | |
axis=1 | |
) | |
working_df['dupes'] = working_df.groupby('sorted').transform('size') | |
working_df = working_df.reset_index() | |
working_df['percentile_finish'] = working_df['index'].rank(pct=True) | |
working_df['finish'] = working_df['index'] | |
working_df = working_df.drop(['sorted', 'index'], axis=1) | |
elif type_var == 'Showdown': | |
working_df['stack'] = working_df.apply( | |
lambda row: Counter( | |
map_dict['team_map'].get(player, '') for player in row[2:] | |
if map_dict['team_map'].get(player, '') != '' | |
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row[2:]) else '', | |
axis=1 | |
) | |
working_df['stack_size'] = working_df.apply( | |
lambda row: Counter( | |
map_dict['team_map'].get(player, '') for player in row[2:] | |
if map_dict['team_map'].get(player, '') != '' | |
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row[2:]) else '', | |
axis=1 | |
) | |
# Modified salary calculation with 1.5x multiplier for first player | |
working_df['salary'] = working_df.apply( | |
lambda row: (map_dict['salary_map'].get(row[2], 0) * 1.5) + | |
sum(map_dict['salary_map'].get(player, 0) for player in row[3:]), | |
axis=1 | |
) | |
# Modified actual_fpts calculation with 1.5x multiplier for first player | |
working_df['actual_fpts'] = working_df.apply( | |
lambda row: (st.session_state['actual_dict'].get(row[2], 0) * 1.5) + | |
sum(st.session_state['actual_dict'].get(player, 0) for player in row[3:]), | |
axis=1 | |
) | |
working_df['actual_own'] = working_df.apply(lambda row: sum(st.session_state['ownership_dict'].get(player, 0) for player in row), axis=1) | |
working_df['sorted'] = working_df[player_columns].apply( | |
lambda row: ','.join(sorted(row.values)), | |
axis=1 | |
) | |
working_df['dupes'] = working_df.groupby('sorted').transform('size') | |
working_df = working_df.reset_index() | |
working_df['percentile_finish'] = working_df['index'].rank(pct=True) | |
working_df['finish'] = working_df['index'] | |
working_df = working_df.drop(['sorted', 'index'], axis=1) | |
st.session_state['field_player_frame'] = create_player_exposures(working_df, player_columns) | |
st.session_state['field_stack_frame'] = create_stack_exposures(working_df) | |
with st.expander("Info and filters"): | |
if st.button('Clear data', key='reset3'): | |
st.session_state.clear() | |
with st.form(key='filter_form'): | |
entry_parse_var = st.selectbox("Do you want to view a specific player(s) or a group of players?", ['All', 'Specific']) | |
entry_names = st.multiselect("Select players", options=st.session_state['entry_list'], default=[]) | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
if 'player_frame' in st.session_state: | |
del st.session_state['player_frame'] | |
if 'stack_frame' in st.session_state: | |
del st.session_state['stack_frame'] | |
# Apply entry name filter if specific entries are selected | |
if entry_parse_var == 'Specific' and entry_names: | |
working_df = working_df[working_df['BaseName'].isin(entry_names)] | |
# Initialize pagination in session state if not exists | |
if 'current_page' not in st.session_state: | |
st.session_state.current_page = 1 | |
# Calculate total pages | |
rows_per_page = 500 | |
total_rows = len(working_df) | |
total_pages = (total_rows + rows_per_page - 1) // rows_per_page | |
# Create pagination controls in a single row | |
pagination_cols = st.columns([4, 1, 1, 1, 4]) | |
with pagination_cols[1]: | |
if st.button(f"Previous Page"): | |
if st.session_state['current_page'] > 1: | |
st.session_state.current_page -= 1 | |
else: | |
st.session_state.current_page = 1 | |
if 'player_frame' in st.session_state: | |
del st.session_state['player_frame'] | |
if 'stack_frame' in st.session_state: | |
del st.session_state['stack_frame'] | |
with pagination_cols[3]: | |
if st.button(f"Next Page"): | |
st.session_state.current_page += 1 | |
if 'player_frame' in st.session_state: | |
del st.session_state['player_frame'] | |
if 'stack_frame' in st.session_state: | |
del st.session_state['stack_frame'] | |
# Calculate start and end indices for current page | |
start_idx = (st.session_state.current_page - 1) * rows_per_page | |
end_idx = min((st.session_state.current_page) * rows_per_page, total_rows) | |
st.dataframe( | |
working_df.iloc[start_idx:end_idx].style | |
.background_gradient(axis=0) | |
.background_gradient(cmap='RdYlGn') | |
.format(precision=2), | |
height=500, | |
use_container_width=True, | |
hide_index=True | |
) | |
with st.container(): | |
tab1, tab2, tab3, tab4 = st.tabs(['Player Used Info', 'Stack Used Info', 'Stack Size Info', 'General Info']) | |
with tab1: | |
col1, col2 = st.columns(2) | |
with col1: | |
pos_var = st.selectbox("Which position(s) would you like to view?", ['All', 'Specific'], key='pos_var') | |
with col2: | |
if pos_var == 'Specific': | |
pos_select = st.multiselect("Select your position(s)", ['P', 'C', '1B', '2B', '3B', 'SS', 'OF'], key='pos_select') | |
else: | |
pos_select = None | |
if entry_parse_var == 'All': | |
st.session_state['player_frame'] = create_player_exposures(working_df, player_columns) | |
hold_frame = st.session_state['player_frame'].copy() | |
if sport_select == 'GOLF': | |
hold_frame['Pos'] = 'G' | |
else: | |
hold_frame['Pos'] = hold_frame['Player'].map(map_dict['pos_map']) | |
st.session_state['player_frame'].insert(1, 'Pos', hold_frame['Pos']) | |
st.session_state['player_frame'] = st.session_state['player_frame'].dropna(subset=['Pos']) | |
if pos_select: | |
position_mask = st.session_state['player_frame']['Pos'].apply(lambda x: any(pos in x for pos in pos_select)) | |
st.session_state['player_frame'] = st.session_state['player_frame'][position_mask] | |
st.dataframe(st.session_state['player_frame']. | |
sort_values(by='Exposure Overall', ascending=False). | |
style.background_gradient(cmap='RdYlGn'). | |
format(formatter='{:.2%}', subset=st.session_state['player_frame'].iloc[:, 2:].select_dtypes(include=['number']).columns), | |
hide_index=True) | |
else: | |
st.session_state['player_frame'] = create_player_exposures(working_df, player_columns, entry_names) | |
hold_frame = st.session_state['player_frame'].copy() | |
if sport_select == 'GOLF': | |
hold_frame['Pos'] = 'G' | |
else: | |
hold_frame['Pos'] = hold_frame['Player'].map(map_dict['pos_map']) | |
st.session_state['player_frame'].insert(1, 'Pos', hold_frame['Pos']) | |
st.session_state['player_frame'] = st.session_state['player_frame'].dropna(subset=['Pos']) | |
if pos_select: | |
position_mask = st.session_state['player_frame']['Pos'].apply(lambda x: any(pos in x for pos in pos_select)) | |
st.session_state['player_frame'] = st.session_state['player_frame'][position_mask] | |
st.dataframe(st.session_state['player_frame']. | |
sort_values(by='Exposure Overall', ascending=False). | |
style.background_gradient(cmap='RdYlGn'). | |
format(formatter='{:.2%}', subset=st.session_state['player_frame'].iloc[:, 2:].select_dtypes(include=['number']).columns), | |
hide_index=True) | |
with tab2: | |
if entry_parse_var == 'All': | |
st.session_state['stack_frame'] = create_stack_exposures(working_df) | |
st.dataframe(st.session_state['stack_frame']. | |
sort_values(by='Exposure Overall', ascending=False). | |
style.background_gradient(cmap='RdYlGn'). | |
format(formatter='{:.2%}', subset=st.session_state['stack_frame'].iloc[:, 1:].select_dtypes(include=['number']).columns), | |
hide_index=True) | |
else: | |
st.session_state['stack_frame'] = create_stack_exposures(working_df, entry_names) | |
st.dataframe(st.session_state['stack_frame']. | |
sort_values(by='Exposure Overall', ascending=False). | |
style.background_gradient(cmap='RdYlGn'). | |
format(formatter='{:.2%}', subset=st.session_state['stack_frame'].iloc[:, 1:].select_dtypes(include=['number']).columns), | |
hide_index=True) | |
with tab3: | |
if entry_parse_var == 'All': | |
st.session_state['stack_size_frame'] = create_stack_size_exposures(working_df) | |
st.dataframe(st.session_state['stack_size_frame']. | |
sort_values(by='Exposure Overall', ascending=False). | |
style.background_gradient(cmap='RdYlGn'). | |
format(formatter='{:.2%}', subset=st.session_state['stack_size_frame'].iloc[:, 1:].select_dtypes(include=['number']).columns), | |
hide_index=True) | |
else: | |
st.session_state['stack_size_frame'] = create_stack_size_exposures(working_df, entry_names) | |
st.dataframe(st.session_state['stack_size_frame']. | |
sort_values(by='Exposure Overall', ascending=False). | |
style.background_gradient(cmap='RdYlGn'). | |
format(formatter='{:.2%}', subset=st.session_state['stack_size_frame'].iloc[:, 1:].select_dtypes(include=['number']).columns), | |
hide_index=True) | |
with tab4: | |
if entry_parse_var == 'All': | |
st.session_state['general_frame'] = create_general_exposures(working_df) | |
st.dataframe(st.session_state['general_frame'].style.background_gradient(cmap='RdYlGn', axis=1).format(precision=2), hide_index=True) | |
else: | |
st.session_state['general_frame'] = create_general_exposures(working_df, entry_names) | |
st.dataframe(st.session_state['general_frame'].style.background_gradient(cmap='RdYlGn', axis=1).format(precision=2), hide_index=True) | |