|
import streamlit as st |
|
st.set_page_config(layout="wide") |
|
import numpy as np |
|
import pandas as pd |
|
from rapidfuzz import process, fuzz |
|
from collections import Counter |
|
from pymongo.mongo_client import MongoClient |
|
from pymongo.server_api import ServerApi |
|
from datetime import datetime |
|
|
|
def init_conn(): |
|
|
|
uri = st.secrets['mongo_uri'] |
|
client = MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000) |
|
db = client['Contest_Information'] |
|
|
|
return db |
|
|
|
def grab_contest_names(db, sport, type): |
|
if type == 'Classic': |
|
db_type = 'reg' |
|
elif type == 'Showdown': |
|
db_type = 'sd' |
|
collection = db[f'{sport}_{db_type}_contest_info'] |
|
cursor = collection.find() |
|
|
|
curr_info = pd.DataFrame(list(cursor)).drop('_id', axis=1) |
|
curr_info['Date'] = pd.to_datetime(curr_info['Contest Date'].sort_values(ascending = False)) |
|
curr_info['Date'] = curr_info['Date'].dt.strftime('%Y-%m-%d') |
|
contest_names = curr_info['Contest Name'] + ' - ' + curr_info['Date'] |
|
|
|
return contest_names, curr_info |
|
|
|
def grab_contest_player_info(db, sport, type, contest_date, contest_name, contest_id_map): |
|
if type == 'Classic': |
|
db_type = 'reg' |
|
elif type == 'Showdown': |
|
db_type = 'showdown' |
|
collection = db[f'{sport}_{db_type}_player_info'] |
|
cursor = collection.find() |
|
|
|
player_info = pd.DataFrame(list(cursor)).drop('_id', axis=1) |
|
player_info = player_info[player_info['Contest Date'] == contest_date] |
|
player_info = player_info.rename(columns={'Display Name': 'Player'}) |
|
player_info = player_info.sort_values(by='Salary', ascending=True).drop_duplicates(subset='Player', keep='first') |
|
|
|
info_maps = { |
|
'position_dict': dict(zip(player_info['Player'], player_info['Position'])), |
|
'salary_dict': dict(zip(player_info['Player'], player_info['Salary'])), |
|
'team_dict': dict(zip(player_info['Player'], player_info['Team'])), |
|
'opp_dict': dict(zip(player_info['Player'], player_info['Opp'])), |
|
'fpts_avg_dict': dict(zip(player_info['Player'], player_info['Avg FPTS'])) |
|
} |
|
|
|
return player_info, info_maps |
|
|
|
db = init_conn() |
|
|
|
|
|
from global_func.load_contest_file import load_contest_file |
|
from global_func.create_player_exposures import create_player_exposures |
|
from global_func.create_stack_exposures import create_stack_exposures |
|
from global_func.create_stack_size_exposures import create_stack_size_exposures |
|
from global_func.create_general_exposures import create_general_exposures |
|
from global_func.grab_contest_data import grab_contest_data |
|
|
|
def is_valid_input(file): |
|
if isinstance(file, pd.DataFrame): |
|
return not file.empty |
|
else: |
|
return file is not None |
|
|
|
player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'} |
|
dupe_format = {'uniques%': '{:.2%}', 'under_5%': '{:.2%}', 'under_10%': '{:.2%}'} |
|
|
|
tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"]) |
|
with tab1: |
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
if st.button('Clear data', key='reset1'): |
|
st.session_state.clear() |
|
search_options, sport_options, date_options = st.columns(3) |
|
with search_options: |
|
parse_type = st.selectbox("Manual upload or DB search?", ['Manual', 'DB Search'], key='parse_type') |
|
with sport_options: |
|
sport_select = st.selectbox("Select Sport", ['MLB', 'MMA', 'GOLF', 'NBA', 'NHL'], key='sport_select') |
|
type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'], key='type_var') |
|
try: |
|
contest_names, curr_info = grab_contest_names(db, sport_select, type_var) |
|
except: |
|
st.error("No contests found for this sport and/or game type") |
|
st.stop() |
|
|
|
with date_options: |
|
date_list = curr_info['Date'].sort_values(ascending=False).unique() |
|
date_list = date_list[date_list != pd.Timestamp.today().strftime('%Y-%m-%d')] |
|
date_select = st.selectbox("Select Date", date_list, key='date_select') |
|
date_select2 = (pd.to_datetime(date_select) + pd.Timedelta(days=1)).strftime('%Y-%m-%d') |
|
|
|
name_parse = curr_info[curr_info['Date'] == date_select]['Contest Name'].reset_index(drop=True) |
|
contest_id_map = dict(zip(name_parse, curr_info[curr_info['Date'] == date_select]['Contest ID'])) |
|
date_select = date_select.replace('-', '') |
|
date_select2 = date_select2.replace('-', '') |
|
|
|
contest_name_var = st.selectbox("Select Contest to load", name_parse) |
|
if parse_type == 'DB Search': |
|
if 'Contest_file_helper' in st.session_state: |
|
del st.session_state['Contest_file_helper'] |
|
if 'Contest_file' in st.session_state: |
|
del st.session_state['Contest_file'] |
|
if 'Contest_file' not in st.session_state: |
|
if st.button('Load Contest Data', key='load_contest_data'): |
|
st.session_state['player_info'], st.session_state['info_maps'] = grab_contest_player_info(db, sport_select, type_var, date_select, contest_name_var, contest_id_map) |
|
st.session_state['Contest_file'] = grab_contest_data(sport_select, contest_name_var, contest_id_map, date_select, date_select2) |
|
else: |
|
pass |
|
with col2: |
|
st.info(f"If you are manually loading and do not have the results CSV for the contest you selected, you can find it here: https://www.draftkings.com/contest/gamecenter/{contest_id_map[contest_name_var]}#/") |
|
if parse_type == 'Manual': |
|
if 'Contest_file_helper' in st.session_state: |
|
del st.session_state['Contest_file_helper'] |
|
if 'Contest_file' in st.session_state: |
|
del st.session_state['Contest_file'] |
|
if 'Contest_file' not in st.session_state: |
|
st.session_state['Contest_upload'] = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls']) |
|
st.session_state['player_info'], st.session_state['info_maps'] = grab_contest_player_info(db, sport_select, type_var, date_select, contest_name_var, contest_id_map) |
|
try: |
|
st.session_state['Contest_file'] = pd.read_csv(st.session_state['Contest_upload']) |
|
except: |
|
st.warning('Please upload a Contest CSV') |
|
else: |
|
pass |
|
|
|
if 'Contest_file' in st.session_state: |
|
st.session_state['Contest'], st.session_state['ownership_df'], st.session_state['actual_df'], st.session_state['entry_list'], check_lineups = load_contest_file(st.session_state['Contest_file'], type_var, st.session_state['player_info'], sport_select) |
|
st.session_state['Contest'] = st.session_state['Contest'].dropna(how='all') |
|
st.session_state['Contest'] = st.session_state['Contest'].reset_index(drop=True) |
|
if st.session_state['Contest'] is not None: |
|
st.success('Contest file loaded successfully!') |
|
st.dataframe(st.session_state['Contest'].head(100)) |
|
|
|
if 'Contest_file' in st.session_state: |
|
st.session_state['ownership_dict'] = dict(zip(st.session_state['ownership_df']['Player'], st.session_state['ownership_df']['Own'])) |
|
st.session_state['actual_dict'] = dict(zip(st.session_state['actual_df']['Player'], st.session_state['actual_df']['FPTS'])) |
|
st.session_state['salary_dict'] = st.session_state['info_maps']['salary_dict'] |
|
st.session_state['team_dict'] = st.session_state['info_maps']['team_dict'] |
|
st.session_state['pos_dict'] = st.session_state['info_maps']['position_dict'] |
|
|
|
with tab2: |
|
excluded_cols = ['BaseName', 'EntryCount'] |
|
if 'Contest' in st.session_state: |
|
player_columns = [col for col in st.session_state['Contest'].columns if col not in excluded_cols] |
|
for col in player_columns: |
|
st.session_state['Contest'][col] = st.session_state['Contest'][col].astype(str) |
|
|
|
|
|
map_dict = { |
|
'pos_map': st.session_state['pos_dict'], |
|
'team_map': st.session_state['team_dict'], |
|
'salary_map': st.session_state['salary_dict'], |
|
'own_map': st.session_state['ownership_dict'], |
|
'own_percent_rank': dict(zip(st.session_state['ownership_df']['Player'], st.session_state['ownership_df']['Own'].rank(pct=True))) |
|
} |
|
|
|
working_df = st.session_state['Contest'].copy() |
|
|
|
if type_var == 'Classic': |
|
working_df['stack'] = working_df.apply( |
|
lambda row: Counter( |
|
map_dict['team_map'].get(player, '') for player in row[4:] |
|
if map_dict['team_map'].get(player, '') != '' |
|
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row[4:]) else '', |
|
axis=1 |
|
) |
|
working_df['stack_size'] = working_df.apply( |
|
lambda row: Counter( |
|
map_dict['team_map'].get(player, '') for player in row[4:] |
|
if map_dict['team_map'].get(player, '') != '' |
|
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row[4:]) else '', |
|
axis=1 |
|
) |
|
working_df['salary'] = working_df.apply(lambda row: sum(map_dict['salary_map'].get(player, 0) for player in row), axis=1) |
|
working_df['actual_fpts'] = working_df.apply(lambda row: sum(st.session_state['actual_dict'].get(player, 0) for player in row), axis=1) |
|
working_df['actual_own'] = working_df.apply(lambda row: sum(st.session_state['ownership_dict'].get(player, 0) for player in row), axis=1) |
|
working_df['sorted'] = working_df[player_columns].apply( |
|
lambda row: ','.join(sorted(row.values)), |
|
axis=1 |
|
) |
|
working_df['dupes'] = working_df.groupby('sorted').transform('size') |
|
|
|
working_df['uniques'] = working_df.groupby('BaseName').apply( |
|
lambda x: (x['dupes'] == 1).sum() |
|
).reindex(working_df['BaseName']).values |
|
|
|
working_df['under_5'] = working_df.groupby('BaseName').apply( |
|
lambda x: (x['dupes'] <= 5).sum() |
|
).reindex(working_df['BaseName']).values |
|
|
|
working_df['under_10'] = working_df.groupby('BaseName').apply( |
|
lambda x: (x['dupes'] <= 10).sum() |
|
).reindex(working_df['BaseName']).values |
|
|
|
working_df = working_df.reset_index() |
|
working_df['percentile_finish'] = working_df['index'].rank(pct=True) |
|
working_df['finish'] = working_df['index'] |
|
working_df = working_df.drop(['sorted', 'index'], axis=1) |
|
|
|
elif type_var == 'Showdown': |
|
working_df['stack'] = working_df.apply( |
|
lambda row: Counter( |
|
map_dict['team_map'].get(player, '') for player in row[2:] |
|
if map_dict['team_map'].get(player, '') != '' |
|
).most_common(1)[0][0] if any(map_dict['team_map'].get(player, '') for player in row[2:]) else '', |
|
axis=1 |
|
) |
|
working_df['stack_size'] = working_df.apply( |
|
lambda row: Counter( |
|
map_dict['team_map'].get(player, '') for player in row[2:] |
|
if map_dict['team_map'].get(player, '') != '' |
|
).most_common(1)[0][1] if any(map_dict['team_map'].get(player, '') for player in row[2:]) else '', |
|
axis=1 |
|
) |
|
|
|
working_df['salary'] = working_df.apply( |
|
lambda row: (map_dict['salary_map'].get(row[2], 0) * 1.5) + |
|
sum(map_dict['salary_map'].get(player, 0) for player in row[3:]), |
|
axis=1 |
|
) |
|
|
|
working_df['actual_fpts'] = working_df.apply( |
|
lambda row: (st.session_state['actual_dict'].get(row[2], 0) * 1.5) + |
|
sum(st.session_state['actual_dict'].get(player, 0) for player in row[3:]), |
|
axis=1 |
|
) |
|
working_df['actual_own'] = working_df.apply(lambda row: sum(st.session_state['ownership_dict'].get(player, 0) for player in row), axis=1) |
|
working_df['sorted'] = working_df[player_columns].apply( |
|
lambda row: ','.join(sorted(row.values)), |
|
axis=1 |
|
) |
|
working_df['dupes'] = working_df.groupby('sorted').transform('size') |
|
|
|
working_df['uniques'] = working_df.groupby('BaseName').apply( |
|
lambda x: (x['dupes'] == 1).sum() |
|
).reindex(working_df['BaseName']).values |
|
|
|
working_df['under_5'] = working_df.groupby('BaseName').apply( |
|
lambda x: (x['dupes'] <= 5).sum() |
|
).reindex(working_df['BaseName']).values |
|
|
|
working_df['under_10'] = working_df.groupby('BaseName').apply( |
|
lambda x: (x['dupes'] <= 10).sum() |
|
).reindex(working_df['BaseName']).values |
|
|
|
working_df = working_df.reset_index() |
|
working_df['percentile_finish'] = working_df['index'].rank(pct=True) |
|
working_df['finish'] = working_df['index'] |
|
working_df = working_df.drop(['sorted', 'index'], axis=1) |
|
|
|
st.session_state['field_player_frame'] = create_player_exposures(working_df, player_columns) |
|
st.session_state['field_stack_frame'] = create_stack_exposures(working_df) |
|
|
|
with st.expander("Info and filters"): |
|
st.info("Note that any filtering here needs to be reset manually, i.e. if you parse down the specific users and want to reset the table, just backtrack your filtering by setting it back to 'All'") |
|
if st.button('Clear data', key='reset3'): |
|
st.session_state.clear() |
|
|
|
with st.form(key='filter_form'): |
|
users_var, entries_var, stack_var, stack_size_var, player_var = st.columns(5) |
|
with users_var: |
|
entry_parse_var = st.selectbox("Do you want to view a specific user(s)?", ['All', 'Specific'], key = 'entry_parse_var') |
|
entry_names = st.multiselect("Select players", options=st.session_state['entry_list'], default=[], key = 'entry_names') |
|
with entries_var: |
|
low_entries_var = st.number_input("Low end of entries range", min_value=0, max_value=150, value=1, key = 'low_entries_var') |
|
high_entries_var = st.number_input("High end of entries range", min_value=0, max_value=150, value=150, key = 'high_entries_var') |
|
with stack_var: |
|
stack_parse_var = st.selectbox("Do you want to view lineups with specific team(s)?", ['All', 'Specific'], key = 'stack_parse_var') |
|
stack_names = st.multiselect("Select teams", options=working_df['stack'].unique(), default=[], key = 'stack_names') |
|
with stack_size_var: |
|
stack_size_parse_var = st.selectbox("Do you want to view a specific stack size(s)?", ['All', 'Specific'], key = 'stack_size_parse_var') |
|
stack_size_names = st.multiselect("Select stack sizes", options=working_df['stack_size'].unique(), default=[], key = 'stack_size_names') |
|
with player_var: |
|
unique_players = pd.unique(working_df[player_columns].values.ravel('K')) |
|
unique_players = [p for p in unique_players if p != 'nan'] |
|
player_parse_var = st.selectbox("Do you want to view lineups with specific player(s)?", ['All', 'Specific'], key = 'player_parse_var') |
|
player_names = st.multiselect("Select players", options=unique_players, default=[], key = 'player_names') |
|
submitted = st.form_submit_button("Submit") |
|
if submitted: |
|
if 'player_frame' in st.session_state: |
|
del st.session_state['player_frame'] |
|
if 'stack_frame' in st.session_state: |
|
del st.session_state['stack_frame'] |
|
|
|
if entry_parse_var == 'Specific' and entry_names: |
|
working_df = working_df[working_df['BaseName'].isin(entry_names)] |
|
if stack_parse_var == 'Specific' and stack_names: |
|
working_df = working_df[working_df['stack'].isin(stack_names)] |
|
if stack_size_parse_var == 'Specific' and stack_size_names: |
|
working_df = working_df[working_df['stack_size'].isin(stack_size_names)] |
|
if player_parse_var == 'Specific' and player_names: |
|
mask = working_df[player_columns].apply(lambda row: all(player in row.values for player in player_names), axis=1) |
|
working_df = working_df[mask] |
|
if low_entries_var and high_entries_var: |
|
working_df = working_df[working_df['EntryCount'].between(low_entries_var, high_entries_var)] |
|
|
|
|
|
if 'current_page' not in st.session_state: |
|
st.session_state.current_page = 1 |
|
|
|
|
|
rows_per_page = 500 |
|
total_rows = len(working_df) |
|
total_pages = (total_rows + rows_per_page - 1) // rows_per_page |
|
|
|
|
|
pagination_cols = st.columns([4, 1, 1, 1, 4]) |
|
with pagination_cols[1]: |
|
if st.button(f"Previous Page"): |
|
if st.session_state['current_page'] > 1: |
|
st.session_state.current_page -= 1 |
|
else: |
|
st.session_state.current_page = 1 |
|
if 'player_frame' in st.session_state: |
|
del st.session_state['player_frame'] |
|
if 'stack_frame' in st.session_state: |
|
del st.session_state['stack_frame'] |
|
|
|
with pagination_cols[3]: |
|
if st.button(f"Next Page"): |
|
st.session_state.current_page += 1 |
|
if 'player_frame' in st.session_state: |
|
del st.session_state['player_frame'] |
|
if 'stack_frame' in st.session_state: |
|
del st.session_state['stack_frame'] |
|
|
|
|
|
start_idx = (st.session_state.current_page - 1) * rows_per_page |
|
end_idx = min((st.session_state.current_page) * rows_per_page, total_rows) |
|
st.dataframe( |
|
working_df.iloc[start_idx:end_idx].style |
|
.background_gradient(axis=0) |
|
.background_gradient(cmap='RdYlGn') |
|
.format(precision=2), |
|
height=500, |
|
use_container_width=True, |
|
hide_index=True |
|
) |
|
|
|
with st.container(): |
|
tab1, tab2, tab3, tab4, tab5 = st.tabs(['Player Used Info', 'Stack Used Info', 'Stack Size Info', 'General Info', 'Duplication Info']) |
|
with tab1: |
|
with st.form(key='player_info_pos_form'): |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
pos_var = st.selectbox("Which position(s) would you like to view?", ['All', 'Specific'], key='pos_var') |
|
with col2: |
|
pos_select = st.multiselect("Select your position(s)", ['P', 'C', '1B', '2B', '3B', 'SS', 'OF'], key='pos_select') |
|
submitted = st.form_submit_button("Submit") |
|
if submitted: |
|
if pos_var == 'Specific': |
|
pos_select = pos_select |
|
else: |
|
pos_select = None |
|
|
|
if entry_parse_var == 'All': |
|
|
|
st.session_state['player_frame'] = create_player_exposures(working_df, player_columns) |
|
hold_frame = st.session_state['player_frame'].copy() |
|
if sport_select == 'GOLF': |
|
hold_frame['Pos'] = 'G' |
|
else: |
|
hold_frame['Pos'] = hold_frame['Player'].map(map_dict['pos_map']) |
|
st.session_state['player_frame'].insert(1, 'Pos', hold_frame['Pos']) |
|
st.session_state['player_frame'] = st.session_state['player_frame'].dropna(subset=['Pos']) |
|
if pos_select: |
|
position_mask = st.session_state['player_frame']['Pos'].apply(lambda x: any(pos in x for pos in pos_select)) |
|
st.session_state['player_frame'] = st.session_state['player_frame'][position_mask] |
|
st.dataframe(st.session_state['player_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['player_frame'].iloc[:, 2:].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
else: |
|
|
|
st.session_state['player_frame'] = create_player_exposures(working_df, player_columns, entry_names) |
|
hold_frame = st.session_state['player_frame'].copy() |
|
if sport_select == 'GOLF': |
|
hold_frame['Pos'] = 'G' |
|
else: |
|
hold_frame['Pos'] = hold_frame['Player'].map(map_dict['pos_map']) |
|
st.session_state['player_frame'].insert(1, 'Pos', hold_frame['Pos']) |
|
st.session_state['player_frame'] = st.session_state['player_frame'].dropna(subset=['Pos']) |
|
if pos_select: |
|
position_mask = st.session_state['player_frame']['Pos'].apply(lambda x: any(pos in x for pos in pos_select)) |
|
st.session_state['player_frame'] = st.session_state['player_frame'][position_mask] |
|
st.dataframe(st.session_state['player_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['player_frame'].iloc[:, 2:].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
with tab2: |
|
|
|
if entry_parse_var == 'All': |
|
st.session_state['stack_frame'] = create_stack_exposures(working_df) |
|
st.dataframe(st.session_state['stack_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['stack_frame'].iloc[:, 1:].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
else: |
|
st.session_state['stack_frame'] = create_stack_exposures(working_df, entry_names) |
|
st.dataframe(st.session_state['stack_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['stack_frame'].iloc[:, 1:].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
with tab3: |
|
|
|
if entry_parse_var == 'All': |
|
st.session_state['stack_size_frame'] = create_stack_size_exposures(working_df) |
|
st.dataframe(st.session_state['stack_size_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['stack_size_frame'].iloc[:, 1:].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
else: |
|
st.session_state['stack_size_frame'] = create_stack_size_exposures(working_df, entry_names) |
|
st.dataframe(st.session_state['stack_size_frame']. |
|
sort_values(by='Exposure Overall', ascending=False). |
|
style.background_gradient(cmap='RdYlGn'). |
|
format(formatter='{:.2%}', subset=st.session_state['stack_size_frame'].iloc[:, 1:].select_dtypes(include=['number']).columns), |
|
hide_index=True) |
|
|
|
with tab4: |
|
|
|
if entry_parse_var == 'All': |
|
st.session_state['general_frame'] = create_general_exposures(working_df) |
|
st.dataframe(st.session_state['general_frame'].style.background_gradient(cmap='RdYlGn', axis=1).format(precision=2), hide_index=True) |
|
|
|
else: |
|
st.session_state['general_frame'] = create_general_exposures(working_df, entry_names) |
|
st.dataframe(st.session_state['general_frame'].style.background_gradient(cmap='RdYlGn', axis=1).format(precision=2), hide_index=True) |
|
|
|
with tab5: |
|
with st.form(key='dupe_form'): |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
user_dupe_var = st.selectbox("Which usage(s) would you like to view?", ['All', 'Specific'], key='user_dupe_var') |
|
with col2: |
|
user_dupe_select = st.multiselect("Select your user(s)", working_df['BaseName'].sort_values().unique(), key='user_dupe_select') |
|
submitted = st.form_submit_button("Submit") |
|
if submitted: |
|
if user_dupe_var == 'Specific': |
|
user_dupe_select = user_dupe_select |
|
else: |
|
user_dupe_select = None |
|
|
|
dupe_frame = working_df[['BaseName', 'EntryCount', 'dupes', 'uniques', 'under_5', 'under_10']] |
|
dupe_frame['average_dupes'] = dupe_frame['dupes'].mean() |
|
dupe_frame['uniques%'] = dupe_frame['uniques'] / dupe_frame['EntryCount'] |
|
dupe_frame['under_5%'] = dupe_frame['under_5'] / dupe_frame['EntryCount'] |
|
dupe_frame['under_10%'] = dupe_frame['under_10'] / dupe_frame['EntryCount'] |
|
dupe_frame = dupe_frame[['BaseName', 'EntryCount', 'average_dupes', 'uniques', 'uniques%', 'under_5', 'under_5%', 'under_10', 'under_10%']].drop_duplicates(subset='BaseName', keep='first') |
|
st.session_state['duplication_frame'] = dupe_frame.sort_values(by='EntryCount', ascending=False) |
|
if user_dupe_var == 'Specific': |
|
st.session_state['duplication_frame'] = st.session_state['duplication_frame'][st.session_state['duplication_frame']['BaseName'].isin(user_dupe_select)] |
|
|
|
|
|
if 'dupe_page' not in st.session_state: |
|
st.session_state.dupe_page = 1 |
|
|
|
|
|
rows_per_page = 50 |
|
total_rows = len(st.session_state['duplication_frame']) |
|
total_pages = (total_rows + rows_per_page - 1) // rows_per_page |
|
|
|
|
|
pagination_cols = st.columns([4, 1, 1, 1, 4]) |
|
with pagination_cols[1]: |
|
if st.button(f"Previous Dupes Page"): |
|
if st.session_state['dupe_page'] > 1: |
|
st.session_state.dupe_page -= 1 |
|
|
|
with pagination_cols[3]: |
|
if st.button(f"Next Dupes Page"): |
|
st.session_state.dupe_page += 1 |
|
|
|
|
|
start_dupe_idx = (st.session_state.dupe_page - 1) * rows_per_page |
|
end_dupe_idx = min((st.session_state.dupe_page) * rows_per_page, total_rows) |
|
|
|
st.dataframe(st.session_state['duplication_frame'].iloc[start_dupe_idx:end_dupe_idx].style. |
|
background_gradient(cmap='RdYlGn', subset=['uniques%', 'under_5%', 'under_10%'], axis=0). |
|
background_gradient(cmap='RdYlGn', subset=['uniques', 'under_5', 'under_10'], axis=0). |
|
format(dupe_format, precision=2), hide_index=True) |
|
|