|
import streamlit as st |
|
import numpy as np |
|
import pandas as pd |
|
import time |
|
from fuzzywuzzy import process |
|
import math |
|
from difflib import SequenceMatcher |
|
|
|
def calculate_weighted_ownership(row_ownerships): |
|
""" |
|
Calculate weighted ownership based on the formula: |
|
(AVERAGE of (each value's average with overall average)) * count - (max - min) |
|
|
|
Args: |
|
row_ownerships: Series containing ownership values in percentage form (e.g., 24.2213 for 24.2213%) |
|
|
|
Returns: |
|
float: Calculated weighted ownership value |
|
""" |
|
|
|
row_ownerships = row_ownerships.dropna() / 100 |
|
|
|
|
|
row_mean = row_ownerships.mean() |
|
|
|
|
|
value_means = [(val + row_mean) / 2 for val in row_ownerships] |
|
|
|
|
|
avg_of_means = sum(value_means) / len(row_ownerships) |
|
|
|
|
|
weighted = avg_of_means * (len(row_ownerships) * 1) |
|
|
|
|
|
weighted = weighted - (row_ownerships.max() - row_ownerships.min()) |
|
|
|
|
|
return weighted * 10000 |
|
|
|
def calculate_player_similarity_score(portfolio, player_columns): |
|
""" |
|
Calculate a similarity score that measures how different each row is from all other rows |
|
based on actual player selection. Optimized for speed using vectorized operations. |
|
Higher scores indicate more unique/different lineups. |
|
|
|
Args: |
|
portfolio: DataFrame containing the portfolio data |
|
player_columns: List of column names containing player names |
|
|
|
Returns: |
|
Series: Similarity scores for each row |
|
""" |
|
|
|
player_data = portfolio[player_columns].fillna('') |
|
|
|
|
|
all_players = set() |
|
for col in player_columns: |
|
unique_vals = player_data[col].unique() |
|
for val in unique_vals: |
|
if isinstance(val, str) and val.strip() != '': |
|
all_players.add(val) |
|
|
|
|
|
player_to_id = {player: idx for idx, player in enumerate(sorted(all_players))} |
|
|
|
|
|
n_players = len(all_players) |
|
n_rows = len(portfolio) |
|
binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8) |
|
|
|
for i, (_, row) in enumerate(player_data.iterrows()): |
|
for val in row.values: |
|
if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id: |
|
binary_matrix[i, player_to_id[str(val)]] = 1 |
|
|
|
|
|
|
|
similarity_scores = np.zeros(n_rows) |
|
|
|
|
|
|
|
|
|
intersection_matrix = np.dot(binary_matrix, binary_matrix.T) |
|
|
|
|
|
row_sums = np.sum(binary_matrix, axis=1) |
|
union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix |
|
|
|
|
|
|
|
with np.errstate(divide='ignore', invalid='ignore'): |
|
jaccard_similarity = np.divide(intersection_matrix, union_matrix, |
|
out=np.zeros_like(intersection_matrix, dtype=float), |
|
where=union_matrix != 0) |
|
|
|
|
|
jaccard_distance = 1 - jaccard_similarity |
|
|
|
|
|
|
|
np.fill_diagonal(jaccard_distance, 0) |
|
row_counts = n_rows - 1 |
|
similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts |
|
|
|
|
|
if similarity_scores.max() > similarity_scores.min(): |
|
similarity_scores = (similarity_scores - similarity_scores.min()) / (similarity_scores.max() - similarity_scores.min()) |
|
|
|
return similarity_scores |
|
|
|
def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var): |
|
if strength_var == 'Weak': |
|
dupes_multiplier = .75 |
|
percentile_multiplier = .90 |
|
elif strength_var == 'Average': |
|
dupes_multiplier = 1.00 |
|
percentile_multiplier = 1.00 |
|
elif strength_var == 'Sharp': |
|
dupes_multiplier = 1.25 |
|
percentile_multiplier = 1.10 |
|
max_ownership = max(maps_dict['own_map'].values()) / 100 |
|
average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100 |
|
if site_var == 'Fanduel': |
|
if type_var == 'Showdown': |
|
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank'] |
|
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own'] |
|
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] |
|
|
|
player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']] |
|
|
|
flex_ownerships = pd.concat([ |
|
portfolio.iloc[:,1].map(maps_dict['own_map']), |
|
portfolio.iloc[:,2].map(maps_dict['own_map']), |
|
portfolio.iloc[:,3].map(maps_dict['own_map']), |
|
portfolio.iloc[:,4].map(maps_dict['own_map']) |
|
]) |
|
flex_rank = flex_ownerships.rank(pct=True) |
|
|
|
|
|
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) |
|
portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
|
|
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 |
|
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 |
|
|
|
portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) |
|
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 |
|
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) |
|
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) |
|
|
|
|
|
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100) |
|
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier |
|
|
|
|
|
portfolio['Dupes'] = np.where( |
|
np.round(portfolio['dupes_calc'], 0) <= 0, |
|
0, |
|
np.round(portfolio['dupes_calc'], 0) - 1 |
|
) |
|
elif type_var == 'Classic': |
|
num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) |
|
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)] |
|
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] |
|
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] |
|
|
|
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']] |
|
|
|
for i in range(1, num_players + 1): |
|
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank']) |
|
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100 |
|
|
|
portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) |
|
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 |
|
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) |
|
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) |
|
|
|
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100) |
|
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier |
|
|
|
portfolio['Dupes'] = np.where( |
|
np.round(portfolio['dupes_calc'], 0) <= 0, |
|
0, |
|
np.round(portfolio['dupes_calc'], 0) - 1 |
|
) |
|
|
|
elif site_var == 'Draftkings': |
|
if type_var == 'Showdown': |
|
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank'] |
|
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own'] |
|
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] |
|
|
|
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']] |
|
|
|
flex_ownerships = pd.concat([ |
|
portfolio.iloc[:,1].map(maps_dict['own_map']), |
|
portfolio.iloc[:,2].map(maps_dict['own_map']), |
|
portfolio.iloc[:,3].map(maps_dict['own_map']), |
|
portfolio.iloc[:,4].map(maps_dict['own_map']), |
|
portfolio.iloc[:,5].map(maps_dict['own_map']) |
|
]) |
|
flex_rank = flex_ownerships.rank(pct=True) |
|
|
|
|
|
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) |
|
portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
|
|
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 |
|
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100 |
|
|
|
portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) |
|
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 |
|
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) |
|
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) |
|
|
|
|
|
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) |
|
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier |
|
|
|
|
|
portfolio['Dupes'] = np.where( |
|
np.round(portfolio['dupes_calc'], 0) <= 0, |
|
0, |
|
np.round(portfolio['dupes_calc'], 0) - 1 |
|
) |
|
elif type_var == 'Classic': |
|
if sport_var == 'CS2': |
|
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank'] |
|
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own'] |
|
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] |
|
|
|
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']] |
|
|
|
flex_ownerships = pd.concat([ |
|
portfolio.iloc[:,1].map(maps_dict['own_map']), |
|
portfolio.iloc[:,2].map(maps_dict['own_map']), |
|
portfolio.iloc[:,3].map(maps_dict['own_map']), |
|
portfolio.iloc[:,4].map(maps_dict['own_map']), |
|
portfolio.iloc[:,5].map(maps_dict['own_map']) |
|
]) |
|
flex_rank = flex_ownerships.rank(pct=True) |
|
|
|
|
|
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) |
|
portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) |
|
|
|
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 |
|
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 |
|
portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100 |
|
|
|
portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) |
|
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 |
|
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) |
|
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) |
|
|
|
|
|
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) |
|
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier |
|
|
|
|
|
portfolio['Dupes'] = np.where( |
|
np.round(portfolio['dupes_calc'], 0) <= 0, |
|
0, |
|
np.round(portfolio['dupes_calc'], 0) - 1 |
|
) |
|
elif sport_var != 'CS2': |
|
num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) |
|
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)] |
|
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] |
|
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] |
|
|
|
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']] |
|
|
|
for i in range(1, num_players + 1): |
|
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank']) |
|
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100 |
|
|
|
portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) |
|
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 |
|
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) |
|
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) |
|
|
|
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) |
|
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier |
|
|
|
portfolio['Dupes'] = np.where( |
|
np.round(portfolio['dupes_calc'], 0) <= 0, |
|
0, |
|
np.round(portfolio['dupes_calc'], 0) - 1 |
|
) |
|
|
|
portfolio['Dupes'] = np.round(portfolio['Dupes'], 0) |
|
portfolio['own_ratio'] = np.where( |
|
portfolio[own_columns].isin([max_ownership]).any(axis=1), |
|
portfolio['own_sum'] / portfolio['own_average'], |
|
(portfolio['own_sum'] - max_ownership) / portfolio['own_average'] |
|
) |
|
percentile_cut_scalar = portfolio['median'].max() |
|
if type_var == 'Classic': |
|
if sport_var == 'CS2': |
|
own_ratio_nerf = 2 |
|
elif sport_var != 'CS2': |
|
own_ratio_nerf = 1.5 |
|
elif type_var == 'Showdown': |
|
own_ratio_nerf = 1.5 |
|
portfolio['Finish_percentile'] = portfolio.apply( |
|
lambda row: .0005 if (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2) < .0005 |
|
else (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2), |
|
axis=1 |
|
) |
|
|
|
portfolio['Ref_Proj'] = portfolio['median'].max() |
|
portfolio['Max_Proj'] = portfolio['Ref_Proj'] + 10 |
|
portfolio['Min_Proj'] = portfolio['Ref_Proj'] - 10 |
|
portfolio['Avg_Ref'] = (portfolio['Max_Proj'] + portfolio['Min_Proj']) / 2 |
|
portfolio['Win%'] = (((portfolio['median'] / portfolio['Avg_Ref']) - (0.1 + ((portfolio['Ref_Proj'] - portfolio['median'])/100))) / (Contest_Size / 1000)) / 10 |
|
max_allowed_win = (1 / Contest_Size) * 5 |
|
portfolio['Win%'] = portfolio['Win%'] / portfolio['Win%'].max() * max_allowed_win |
|
|
|
portfolio['Finish_percentile'] = portfolio['Finish_percentile'] + .005 + (.005 * (Contest_Size / 10000)) |
|
portfolio['Finish_percentile'] = portfolio['Finish_percentile'] * percentile_multiplier |
|
portfolio['Win%'] = portfolio['Win%'] * (1 - portfolio['Finish_percentile']) |
|
|
|
portfolio['low_own_count'] = portfolio[own_columns].apply(lambda row: (row < 0.10).sum(), axis=1) |
|
portfolio['Finish_percentile'] = portfolio.apply(lambda row: row['Finish_percentile'] if row['low_own_count'] <= 0 else row['Finish_percentile'] / row['low_own_count'], axis=1) |
|
portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5)) |
|
portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1) |
|
portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean() |
|
portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1) |
|
portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns)) |
|
|
|
|
|
portfolio['Similarity Score'] = calculate_player_similarity_score(portfolio, player_columns) |
|
|
|
portfolio = portfolio.drop(columns=dup_count_columns) |
|
portfolio = portfolio.drop(columns=own_columns) |
|
portfolio = portfolio.drop(columns=calc_columns) |
|
|
|
return portfolio |