import streamlit as st import numpy as np import pandas as pd import time from fuzzywuzzy import process import math from difflib import SequenceMatcher def calculate_weighted_ownership(row_ownerships): """ Calculate weighted ownership based on the formula: (AVERAGE of (each value's average with overall average)) * count - (max - min) Args: row_ownerships: Series containing ownership values in percentage form (e.g., 24.2213 for 24.2213%) Returns: float: Calculated weighted ownership value """ # Drop NaN values and convert percentages to decimals row_ownerships = row_ownerships.dropna() / 100 # Get the mean of all ownership values row_mean = row_ownerships.mean() # Calculate average of each value with the overall mean value_means = [(val + row_mean) / 2 for val in row_ownerships] # Take average of all those means avg_of_means = sum(value_means) / len(row_ownerships) # Multiply by count of values weighted = avg_of_means * (len(row_ownerships) * 1) # Subtract (max - min) weighted = weighted - (row_ownerships.max() - row_ownerships.min()) # Convert back to percentage form to match input format return weighted * 10000 def calculate_player_similarity_score(portfolio, player_columns): """ Calculate a similarity score that measures how different each row is from all other rows based on actual player selection. Converts players to numeric IDs for faster comparison. Higher scores indicate more unique/different lineups. Args: portfolio: DataFrame containing the portfolio data player_columns: List of column names containing player names Returns: Series: Similarity scores for each row """ # Extract player data player_data = portfolio[player_columns].fillna('') # Get all unique players and create a mapping to numeric IDs all_players = set() for col in player_columns: unique_vals = player_data[col].unique() for val in unique_vals: if isinstance(val, str) and val.strip() != '': all_players.add(val) # Create player ID mapping player_to_id = {player: idx for idx, player in enumerate(sorted(all_players))} # Convert each row to a list of player IDs row_ids = [] for _, row in player_data.iterrows(): # Get player IDs for this row, sorted for consistency player_ids = sorted([player_to_id[str(val)] for val in row.values if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id]) row_ids.append(player_ids) # Calculate similarity scores using Jaccard distance on player ID sets similarity_scores = [] for i in range(len(portfolio)): distances = [] for j in range(len(portfolio)): if i != j: # Convert to sets for Jaccard calculation set_i = set(row_ids[i]) set_j = set(row_ids[j]) # Calculate Jaccard distance if len(set_i) == 0 and len(set_j) == 0: # Both lineups are empty distance = 0.0 elif len(set_i) == 0 or len(set_j) == 0: # One lineup is empty, other is not distance = 1.0 else: # Jaccard distance = 1 - (intersection / union) intersection = len(set_i & set_j) union = len(set_i | set_j) distance = 1 - (intersection / union) distances.append(distance) # Average distance to all other lineups avg_distance = np.mean(distances) if distances else 0 similarity_scores.append(avg_distance) # Normalize to 0-1 scale where 1 = most unique/different similarity_scores = np.array(similarity_scores) if similarity_scores.max() > similarity_scores.min(): similarity_scores = (similarity_scores - similarity_scores.min()) / (similarity_scores.max() - similarity_scores.min()) return similarity_scores def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var): if strength_var == 'Weak': dupes_multiplier = .75 percentile_multiplier = .90 elif strength_var == 'Average': dupes_multiplier = 1.00 percentile_multiplier = 1.00 elif strength_var == 'Sharp': dupes_multiplier = 1.25 percentile_multiplier = 1.10 max_ownership = max(maps_dict['own_map'].values()) / 100 average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100 if site_var == 'Fanduel': if type_var == 'Showdown': dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank'] own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own'] calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] # Get the original player columns (first 5 columns excluding salary, median, Own) player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']] flex_ownerships = pd.concat([ portfolio.iloc[:,1].map(maps_dict['own_map']), portfolio.iloc[:,2].map(maps_dict['own_map']), portfolio.iloc[:,3].map(maps_dict['own_map']), portfolio.iloc[:,4].map(maps_dict['own_map']) ]) flex_rank = flex_ownerships.rank(pct=True) # Assign ranks back to individual columns using the same rank scale portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) # Calculate dupes formula portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100) portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier # Round and handle negative values portfolio['Dupes'] = np.where( np.round(portfolio['dupes_calc'], 0) <= 0, 0, np.round(portfolio['dupes_calc'], 0) - 1 ) elif type_var == 'Classic': num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)] own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] # Get the original player columns (first num_players columns excluding salary, median, Own) player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']] for i in range(1, num_players + 1): portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank']) portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100 portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100) portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier # Round and handle negative values portfolio['Dupes'] = np.where( np.round(portfolio['dupes_calc'], 0) <= 0, 0, np.round(portfolio['dupes_calc'], 0) - 1 ) elif site_var == 'Draftkings': if type_var == 'Showdown': dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank'] own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own'] calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] # Get the original player columns (first 6 columns excluding salary, median, Own) player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']] flex_ownerships = pd.concat([ portfolio.iloc[:,1].map(maps_dict['own_map']), portfolio.iloc[:,2].map(maps_dict['own_map']), portfolio.iloc[:,3].map(maps_dict['own_map']), portfolio.iloc[:,4].map(maps_dict['own_map']), portfolio.iloc[:,5].map(maps_dict['own_map']) ]) flex_rank = flex_ownerships.rank(pct=True) # Assign ranks back to individual columns using the same rank scale portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100 portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) # Calculate dupes formula portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier # Round and handle negative values portfolio['Dupes'] = np.where( np.round(portfolio['dupes_calc'], 0) <= 0, 0, np.round(portfolio['dupes_calc'], 0) - 1 ) elif type_var == 'Classic': if sport_var == 'CS2': dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank'] own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own'] calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] # Get the original player columns (first 6 columns excluding salary, median, Own) player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']] flex_ownerships = pd.concat([ portfolio.iloc[:,1].map(maps_dict['own_map']), portfolio.iloc[:,2].map(maps_dict['own_map']), portfolio.iloc[:,3].map(maps_dict['own_map']), portfolio.iloc[:,4].map(maps_dict['own_map']), portfolio.iloc[:,5].map(maps_dict['own_map']) ]) flex_rank = flex_ownerships.rank(pct=True) # Assign ranks back to individual columns using the same rank scale portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100 portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) # Calculate dupes formula portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier # Round and handle negative values portfolio['Dupes'] = np.where( np.round(portfolio['dupes_calc'], 0) <= 0, 0, np.round(portfolio['dupes_calc'], 0) - 1 ) elif sport_var != 'CS2': num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)] own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] # Get the original player columns (first num_players columns excluding salary, median, Own) player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']] for i in range(1, num_players + 1): portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank']) portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100 portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier # Round and handle negative values portfolio['Dupes'] = np.where( np.round(portfolio['dupes_calc'], 0) <= 0, 0, np.round(portfolio['dupes_calc'], 0) - 1 ) portfolio['Dupes'] = np.round(portfolio['Dupes'], 0) portfolio['own_ratio'] = np.where( portfolio[own_columns].isin([max_ownership]).any(axis=1), portfolio['own_sum'] / portfolio['own_average'], (portfolio['own_sum'] - max_ownership) / portfolio['own_average'] ) percentile_cut_scalar = portfolio['median'].max() # Get scalar value if type_var == 'Classic': if sport_var == 'CS2': own_ratio_nerf = 2 elif sport_var != 'CS2': own_ratio_nerf = 1.5 elif type_var == 'Showdown': own_ratio_nerf = 1.5 portfolio['Finish_percentile'] = portfolio.apply( lambda row: .0005 if (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2) < .0005 else (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2), axis=1 ) portfolio['Ref_Proj'] = portfolio['median'].max() portfolio['Max_Proj'] = portfolio['Ref_Proj'] + 10 portfolio['Min_Proj'] = portfolio['Ref_Proj'] - 10 portfolio['Avg_Ref'] = (portfolio['Max_Proj'] + portfolio['Min_Proj']) / 2 portfolio['Win%'] = (((portfolio['median'] / portfolio['Avg_Ref']) - (0.1 + ((portfolio['Ref_Proj'] - portfolio['median'])/100))) / (Contest_Size / 1000)) / 10 max_allowed_win = (1 / Contest_Size) * 5 portfolio['Win%'] = portfolio['Win%'] / portfolio['Win%'].max() * max_allowed_win portfolio['Finish_percentile'] = portfolio['Finish_percentile'] + .005 + (.005 * (Contest_Size / 10000)) portfolio['Finish_percentile'] = portfolio['Finish_percentile'] * percentile_multiplier portfolio['Win%'] = portfolio['Win%'] * (1 - portfolio['Finish_percentile']) portfolio['low_own_count'] = portfolio[own_columns].apply(lambda row: (row < 0.10).sum(), axis=1) portfolio['Finish_percentile'] = portfolio.apply(lambda row: row['Finish_percentile'] if row['low_own_count'] <= 0 else row['Finish_percentile'] / row['low_own_count'], axis=1) portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5)) portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1) portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean() portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1) portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns)) # Calculate similarity score based on actual player selection portfolio['Similarity Score'] = calculate_player_similarity_score(portfolio, player_columns) portfolio = portfolio.drop(columns=dup_count_columns) portfolio = portfolio.drop(columns=own_columns) portfolio = portfolio.drop(columns=calc_columns) return portfolio