File size: 24,205 Bytes
58cea02 d765ee8 58cea02 9c7e08b 58cea02 8e43993 d04558f 58cea02 e560f1d 58cea02 265f036 58cea02 1689df1 5db8a23 59693da 5db8a23 59693da 58cea02 1689df1 58cea02 356c7d4 90735b6 c74ec2d 90735b6 58cea02 6d04e58 2df0c40 76d511e f575676 2c57866 e24862c 2df0c40 e24862c 2df0c40 59dc088 9da8f46 59dc088 2c57866 6d04e58 e79a7d9 59dc088 d765ee8 18b59a2 d765ee8 18b59a2 d765ee8 59dc088 d765ee8 18b59a2 d765ee8 18b59a2 d765ee8 59dc088 c74ec2d 59dc088 c74ec2d 59dc088 76d511e 59dc088 16fbcab 6db62f0 16fbcab 59dc088 76d511e 59dc088 76d511e 59dc088 6d04e58 59dc088 6d04e58 59dc088 6d04e58 59dc088 76d511e 59dc088 16fbcab 6db62f0 16fbcab f978d14 a19edd8 9c82393 a19edd8 daa425d 9c82393 dd71678 7daa093 9c82393 dd71678 9da8f46 dd71678 a19edd8 daa425d 7daa093 a19edd8 9da8f46 a19edd8 857c2eb a19edd8 9c82393 a19edd8 37d804e a19edd8 f978d14 db00ed7 f978d14 51da0a5 b439c13 51da0a5 f978d14 2804dab 30aeb4e 2804dab 30aeb4e 2804dab 30aeb4e 2804dab 30aeb4e 2804dab 30aeb4e d342b82 2804dab 30aeb4e 2804dab 30aeb4e 2804dab 30aeb4e 2804dab 30aeb4e 2804dab 30aeb4e 2804dab db00ed7 51da0a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 |
import streamlit as st
st.set_page_config(layout="wide")
import numpy as np
import pandas as pd
import time
from fuzzywuzzy import process
from collections import Counter
## import global functions
from global_func.clean_player_name import clean_player_name
from global_func.load_contest_file import load_contest_file
from global_func.load_file import load_file
from global_func.load_ss_file import load_ss_file
from global_func.find_name_mismatches import find_name_mismatches
from global_func.predict_dupes import predict_dupes
from global_func.highlight_rows import highlight_changes, highlight_changes_winners, highlight_changes_losers
from global_func.load_csv import load_csv
from global_func.find_csv_mismatches import find_csv_mismatches
player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"])
with tab1:
if st.button('Clear data', key='reset1'):
st.session_state.clear()
sport_select = st.selectbox("Select Sport", ['MLB', 'NBA', 'NFL'])
# Add file uploaders to your app
col1, col2 = st.columns(2)
with col1:
st.subheader("Contest File")
st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.")
Contest_file = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls'])
if 'Contest' in st.session_state:
del st.session_state['Contest']
if Contest_file:
contest_base, ownership_df, fpts_df, st.session_state['entry_list'] = load_contest_file(Contest_file, sport_select)
contest_base = contest_base.dropna(how='all')
contest_base = contest_base.reset_index(drop=True)
if contest_base is not None:
st.success('Contest file loaded successfully!')
st.dataframe(contest_base.head(10))
with col2:
st.subheader("Projections File")
st.info("upload a projections file that has 'player_names', 'salary', 'median', 'ownership', and 'captain ownership' (Needed for Showdown) columns. Note that the salary for showdown needs to be the FLEX salary, not the captain salary.")
# Create two columns for the uploader and template button
upload_col, template_col = st.columns([3, 1])
with upload_col:
projections_file = st.file_uploader("Upload Projections File (CSV or Excel)", type=['csv', 'xlsx', 'xls'])
if 'projections_df' in st.session_state:
del st.session_state['projections_df']
with template_col:
# Create empty DataFrame with required columns
template_df = pd.DataFrame(columns=['player_names', 'position', 'team', 'salary', 'median', 'ownership', 'captain ownership'])
# Add download button for template
st.download_button(
label="Template",
data=template_df.to_csv(index=False),
file_name="projections_template.csv",
mime="text/csv"
)
if projections_file:
export_projections, projections = load_file(projections_file)
if projections is not None:
st.success('Projections file loaded successfully!')
st.dataframe(projections.head(10))
if Contest_file and projections_file:
if 'Contest' not in st.session_state and 'projections_df' not in st.session_state:
if contest_base is not None and projections is not None:
st.subheader("Name Matching functions")
st.session_state['Contest'], st.session_state['projections_df'], ownership_dict, actual_dict = find_name_mismatches(contest_base, projections, ownership_df, fpts_df)
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
with tab2:
if 'Contest' in st.session_state and 'projections_df' in st.session_state:
col1, col2 = st.columns([1, 8])
excluded_cols = ['BaseName', 'EntryCount']
player_columns = [col for col in st.session_state['Contest'].columns if col not in excluded_cols]
for col in player_columns:
st.session_state['Contest'][col] = st.session_state['Contest'][col].astype(str)
# Create mapping dictionaries
map_dict = {
'pos_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
'team_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
'salary_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
'proj_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
'own_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
'own_percent_rank': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
'cpt_salary_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
'cpt_proj_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
'cpt_own_map': dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
}
# Create a copy of the dataframe for calculations
working_df = st.session_state['Contest'].copy()
with col1:
with st.expander("Info and filters"):
if st.button('Clear data', key='reset3'):
st.session_state.clear()
with st.form(key='filter_form'):
type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
entry_parse_var = st.selectbox("Do you want to view a specific player(s) or a group of players?", ['All', 'Specific'])
entry_names = st.multiselect("Select players", options=st.session_state['entry_list'], default=[])
submitted = st.form_submit_button("Submit")
if submitted:
if 'player_frame' in st.session_state:
del st.session_state['player_frame']
if 'stack_frame' in st.session_state:
del st.session_state['stack_frame']
# Apply entry name filter if specific entries are selected
if entry_parse_var == 'Specific' and entry_names:
working_df = working_df[working_df['BaseName'].isin(entry_names)]
# Calculate metrics based on game type
st.write(actual_dict)
if type_var == 'Classic':
working_df['stack'] = working_df.apply(
lambda row: Counter(
map_dict['team_map'].get(player, '') for player in row[4:]
if map_dict['team_map'].get(player, '') != ''
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row[4:]) else '',
axis=1
)
working_df['stack_size'] = working_df.apply(
lambda row: Counter(
map_dict['team_map'].get(player, '') for player in row[4:]
if map_dict['team_map'].get(player, '') != ''
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row[4:]) else '',
axis=1
)
working_df['salary'] = working_df.apply(lambda row: sum(map_dict['salary_map'].get(player, 0) for player in row), axis=1)
working_df['median'] = working_df.apply(lambda row: sum(map_dict['proj_map'].get(player, 0) for player in row), axis=1)
working_df['actual_fpts'] = working_df.apply(lambda row: sum(actual_dict.get(player, 0) for player in row), axis=1)
working_df['Own'] = working_df.apply(lambda row: sum(map_dict['own_map'].get(player, 0) for player in row), axis=1)
working_df['actual_own'] = working_df.apply(lambda row: sum(ownership_dict.get(player, 0) for player in row), axis=1)
working_df['sorted'] = working_df[player_columns].apply(
lambda row: ','.join(sorted(row.values)),
axis=1
)
working_df['dupes'] = working_df.groupby('sorted').transform('size')
working_df = working_df.reset_index()
working_df['percentile_finish'] = working_df['index'].rank(pct=True)
working_df = working_df.drop(['sorted', 'index'], axis=1)
elif type_var == 'Showdown':
working_df['stack'] = working_df.apply(
lambda row: Counter(
map_dict['team_map'].get(player, '') for player in row
if map_dict['team_map'].get(player, '') != ''
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row) else '',
axis=1
)
working_df['stack_size'] = working_df.apply(
lambda row: Counter(
map_dict['team_map'].get(player, '') for player in row
if map_dict['team_map'].get(player, '') != ''
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row) else '',
axis=1
)
working_df['salary'] = working_df.apply(
lambda row: map_dict['cpt_salary_map'].get(row.iloc[0], 0) +
sum(map_dict['salary_map'].get(player, 0) for player in row.iloc[1:]),
axis=1
)
working_df['median'] = working_df.apply(
lambda row: map_dict['cpt_proj_map'].get(row.iloc[0], 0) +
sum(map_dict['proj_map'].get(player, 0) for player in row.iloc[1:]),
axis=1
)
working_df['Own'] = working_df.apply(
lambda row: map_dict['cpt_own_map'].get(row.iloc[0], 0) +
sum(map_dict['own_map'].get(player, 0) for player in row.iloc[1:]),
axis=1
)
working_df['sorted'] = working_df[player_columns].apply(
lambda row: row[0] + '|' + ','.join(sorted(row[1:].values)),
axis=1
)
working_df['dupes'] = working_df.groupby('sorted').transform('size')
working_df = working_df.reset_index()
working_df['percentile_finish'] = working_df['index'].rank(pct=True)
working_df = working_df.drop(['sorted', 'index'], axis=1)
# Initialize pagination in session state if not exists
if 'current_page' not in st.session_state:
st.session_state.current_page = 1
# Calculate total pages
rows_per_page = 500
total_rows = len(working_df)
total_pages = (total_rows + rows_per_page - 1) // rows_per_page
# Create pagination controls in a single row
pagination_cols = st.columns([4, 1, 1, 1, 4])
with pagination_cols[1]:
if st.button(f"Previous Page"):
if st.session_state['current_page'] > 1:
st.session_state.current_page -= 1
else:
st.session_state.current_page = 1
if 'player_frame' in st.session_state:
del st.session_state['player_frame']
if 'stack_frame' in st.session_state:
del st.session_state['stack_frame']
with pagination_cols[3]:
if st.button(f"Next Page"):
st.session_state.current_page += 1
if 'player_frame' in st.session_state:
del st.session_state['player_frame']
if 'stack_frame' in st.session_state:
del st.session_state['stack_frame']
# Calculate start and end indices for current page
start_idx = (st.session_state.current_page - 1) * rows_per_page
end_idx = min((st.session_state.current_page) * rows_per_page, total_rows)
st.dataframe(
working_df.iloc[start_idx:end_idx].style
.background_gradient(axis=0)
.background_gradient(cmap='RdYlGn')
.format(precision=2),
height=500,
use_container_width=True,
hide_index=True
)
with st.container():
tab1, tab2, tab3 = st.tabs(['Player Used Info', 'Stack Used Info', 'Duplication Info'])
with tab1:
if entry_parse_var == 'All':
overall_players = pd.Series(list(working_df[player_columns].values.flatten())).value_counts()
top_1per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01][player_columns].values.flatten())).value_counts()
top_5per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05][player_columns].values.flatten())).value_counts()
top_10per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10][player_columns].values.flatten())).value_counts()
top_20per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20][player_columns].values.flatten())).value_counts()
contest_len = len(working_df)
len_1per = len(working_df[working_df['percentile_finish'] <= 0.01])
len_5per = len(working_df[working_df['percentile_finish'] <= 0.05])
len_10per = len(working_df[working_df['percentile_finish'] <= 0.10])
len_20per = len(working_df[working_df['percentile_finish'] <= 0.20])
each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%']
each_frame_set = [overall_players, top_1per_players, top_5per_players, top_10per_players, top_20per_players]
each_len_set = [contest_len, len_1per, len_5per, len_10per, len_20per]
player_count_var = 0
for each_set in each_frame_set:
set_frame = each_set.to_frame().reset_index().rename(columns={'index': 'Player', 'count': 'Count'})
set_frame['Percent'] = set_frame['Count'] / each_len_set[player_count_var]
set_frame = set_frame[['Player', 'Percent']]
set_frame = set_frame.rename(columns={'Percent': f'Exposure {each_set_name[player_count_var]}'})
if 'player_frame' not in st.session_state:
st.session_state['player_frame'] = set_frame
else:
st.session_state['player_frame'] = pd.merge(st.session_state['player_frame'], set_frame, on='Player', how='outer')
player_count_var += 1
st.dataframe(st.session_state['player_frame'].
sort_values(by='Exposure Overall', ascending=False).
style.background_gradient(cmap='RdYlGn').
format(formatter='{:.2%}', subset=st.session_state['player_frame'].select_dtypes(include=['number']).columns),
hide_index=True)
else:
overall_players = pd.Series(list(working_df[working_df['BaseName'].isin(entry_names)][player_columns].values.flatten())).value_counts()
top_1per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01][player_columns].values.flatten())).value_counts()
top_5per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05][player_columns].values.flatten())).value_counts()
top_10per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10][player_columns].values.flatten())).value_counts()
top_20per_players = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20][player_columns].values.flatten())).value_counts()
contest_len = len(working_df)
len_1per = len(working_df[working_df['percentile_finish'] <= 0.01])
len_5per = len(working_df[working_df['percentile_finish'] <= 0.05])
len_10per = len(working_df[working_df['percentile_finish'] <= 0.10])
len_20per = len(working_df[working_df['percentile_finish'] <= 0.20])
each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%']
each_frame_set = [overall_players, top_1per_players, top_5per_players, top_10per_players, top_20per_players]
each_len_set = [contest_len, len_1per, len_5per, len_10per, len_20per]
player_count_var = 0
for each_set in each_frame_set:
set_frame = each_set.to_frame().reset_index().rename(columns={'index': 'Player', 'count': 'Count'})
set_frame['Percent'] = set_frame['Count'] / each_len_set[player_count_var]
set_frame = set_frame[['Player', 'Percent']]
set_frame = set_frame.rename(columns={'Percent': f'Exposure {each_set_name[player_count_var]}'})
if 'player_frame' not in st.session_state:
st.session_state['player_frame'] = set_frame
else:
st.session_state['player_frame'] = pd.merge(st.session_state['player_frame'], set_frame, on='Player', how='outer')
player_count_var += 1
st.dataframe(st.session_state['player_frame'].
sort_values(by='Exposure Overall', ascending=False).
style.background_gradient(cmap='RdYlGn').
format(formatter='{:.2%}', subset=st.session_state['player_frame'].select_dtypes(include=['number']).columns),
hide_index=True)
with tab2:
if entry_parse_var == 'All':
overall_stacks = pd.Series(list(working_df['stack'])).value_counts()
top_1per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01]['stack'])).value_counts()
top_5per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05]['stack'])).value_counts()
top_10per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10]['stack'])).value_counts()
top_20per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20]['stack'])).value_counts()
stacks_contest_len = len(working_df)
stacks_len_1per = len(working_df[working_df['percentile_finish'] <= 0.01])
stacks_len_5per = len(working_df[working_df['percentile_finish'] <= 0.05])
stacks_len_10per = len(working_df[working_df['percentile_finish'] <= 0.10])
stacks_len_20per = len(working_df[working_df['percentile_finish'] <= 0.20])
each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%']
each_stacks_set = [overall_stacks, top_1per_stacks, top_5per_stacks, top_10per_stacks, top_20per_stacks]
each_stacks_len_set = [stacks_contest_len, stacks_len_1per, stacks_len_5per, stacks_len_10per, stacks_len_20per]
stack_count_var = 0
for each_stack in each_stacks_set:
stack_frame = each_stack.to_frame().reset_index().rename(columns={'index': 'Stack', 'count': 'Count'})
stack_frame['Percent'] = stack_frame['Count'] / each_stacks_len_set[stack_count_var]
stack_frame = stack_frame[['Stack', 'Percent']]
stack_frame = stack_frame.rename(columns={'Percent': f'Exposure {each_set_name[stack_count_var]}'})
if 'stack_frame' not in st.session_state:
st.session_state['stack_frame'] = stack_frame
else:
st.session_state['stack_frame'] = pd.merge(st.session_state['stack_frame'], stack_frame, on='Stack', how='outer')
stack_count_var += 1
st.dataframe(st.session_state['stack_frame'].
sort_values(by='Exposure Overall', ascending=False).
style.background_gradient(cmap='RdYlGn').
format(formatter='{:.2%}', subset=st.session_state['stack_frame'].select_dtypes(include=['number']).columns),
hide_index=True)
else:
overall_stacks = pd.Series(list(working_df[working_df['BaseName'].isin(entry_names)]['stack'])).value_counts()
top_1per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.01]['stack'])).value_counts()
top_5per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.05]['stack'])).value_counts()
top_10per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.10]['stack'])).value_counts()
top_20per_stacks = pd.Series(list(working_df[working_df['percentile_finish'] <= 0.20]['stack'])).value_counts()
stacks_contest_len = len(working_df)
stacks_len_1per = len(working_df[working_df['percentile_finish'] <= 0.01])
stacks_len_5per = len(working_df[working_df['percentile_finish'] <= 0.05])
stacks_len_10per = len(working_df[working_df['percentile_finish'] <= 0.10])
stacks_len_20per = len(working_df[working_df['percentile_finish'] <= 0.20])
each_set_name = ['Overall', ' Top 1%', ' Top 5%', 'Top 10%', 'Top 20%']
each_stacks_set = [overall_stacks, top_1per_stacks, top_5per_stacks, top_10per_stacks, top_20per_stacks]
each_stacks_len_set = [stacks_contest_len, stacks_len_1per, stacks_len_5per, stacks_len_10per, stacks_len_20per]
stack_count_var = 0
for each_stack in each_stacks_set:
stack_frame = each_stack.to_frame().reset_index().rename(columns={'index': 'Stack', 'count': 'Count'})
stack_frame['Percent'] = stack_frame['Count'] / each_stacks_len_set[stack_count_var]
stack_frame = stack_frame[['Stack', 'Percent']]
stack_frame = stack_frame.rename(columns={'Percent': f'Exposure {each_set_name[stack_count_var]}'})
if 'stack_frame' not in st.session_state:
st.session_state['stack_frame'] = stack_frame
else:
st.session_state['stack_frame'] = pd.merge(st.session_state['stack_frame'], stack_frame, on='Stack', how='outer')
stack_count_var += 1
st.dataframe(st.session_state['stack_frame'].
sort_values(by='Exposure Overall', ascending=False).
style.background_gradient(cmap='RdYlGn').
format(formatter='{:.2%}', subset=st.session_state['stack_frame'].select_dtypes(include=['number']).columns),
hide_index=True)
with tab3:
st.write('holding')
|