James McCool
commited on
Commit
·
a35b524
1
Parent(s):
84c79dc
Initial Commit
Browse files- global_func/clean_player_name.py +16 -0
- global_func/find_csv_mismatches.py +93 -0
- global_func/find_name_mismatches.py +92 -0
- global_func/highlight_rows.py +29 -0
- global_func/load_csv.py +24 -0
- global_func/load_file.py +31 -0
- global_func/load_ss_file.py +34 -0
- global_func/optimize_lineup.py +74 -0
- global_func/predict_dupes.py +188 -0
global_func/clean_player_name.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
def clean_player_name(name):
|
8 |
+
# Handle colon case first (remove everything before colon)
|
9 |
+
if ':' in name:
|
10 |
+
name = name.split(':')[1].strip()
|
11 |
+
|
12 |
+
# Handle parentheses case (remove everything after opening parenthesis)
|
13 |
+
if '(' in name:
|
14 |
+
name = name.split('(')[0].strip()
|
15 |
+
|
16 |
+
return name
|
global_func/find_csv_mismatches.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
from fuzzywuzzy import process
|
5 |
+
|
6 |
+
def find_csv_mismatches(csv_df, projections_df):
|
7 |
+
# Create copies of the dataframes to avoid modifying the originals
|
8 |
+
csv_df = csv_df.copy()
|
9 |
+
projections_df = projections_df.copy()
|
10 |
+
|
11 |
+
if 'Name' not in csv_df.columns:
|
12 |
+
st.error("No 'Name' column found in CSV file")
|
13 |
+
return csv_df
|
14 |
+
|
15 |
+
if 'player_names' not in projections_df.columns:
|
16 |
+
st.error("No 'player_names' column found in projections file")
|
17 |
+
return csv_df
|
18 |
+
|
19 |
+
# Get unique player names from CSV and projections
|
20 |
+
csv_players = set(csv_df['Name'].dropna().unique())
|
21 |
+
projection_players = set(projections_df['player_names'].unique())
|
22 |
+
projection_players_list = list(csv_players)
|
23 |
+
|
24 |
+
# Find players in CSV that are missing from projections
|
25 |
+
players_missing_from_projections = list(projection_players - csv_players)
|
26 |
+
|
27 |
+
# Automatically handle 100% matches before starting interactive process
|
28 |
+
players_to_process = []
|
29 |
+
for player in players_missing_from_projections:
|
30 |
+
if not isinstance(player, str):
|
31 |
+
st.warning(f"Skipping non-string value: {player}")
|
32 |
+
continue
|
33 |
+
closest_matches = process.extract(player, projection_players_list, limit=1)
|
34 |
+
if closest_matches[0][1] == 100: # If perfect match found
|
35 |
+
match_name = closest_matches[0][0]
|
36 |
+
# Update CSV DataFrame to use the projection name
|
37 |
+
csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name
|
38 |
+
st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
|
39 |
+
else:
|
40 |
+
players_to_process.append(player)
|
41 |
+
|
42 |
+
# Initialize session state for tracking current player if not exists
|
43 |
+
if 'csv_current_player_index' not in st.session_state:
|
44 |
+
st.session_state.csv_current_player_index = 0
|
45 |
+
st.session_state.csv_players_to_process = players_to_process
|
46 |
+
|
47 |
+
# Display results
|
48 |
+
if players_missing_from_projections:
|
49 |
+
st.warning("Players in CSV but missing from projections")
|
50 |
+
|
51 |
+
# Display remaining players
|
52 |
+
remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:]
|
53 |
+
st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
|
54 |
+
"\n".join(f"- {player}" for player in remaining_players))
|
55 |
+
|
56 |
+
if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process):
|
57 |
+
current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index]
|
58 |
+
|
59 |
+
# Find the top 3 closest matches
|
60 |
+
closest_matches = process.extract(current_player, projection_players_list, limit=3)
|
61 |
+
|
62 |
+
st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}")
|
63 |
+
|
64 |
+
# Create radio buttons for selection
|
65 |
+
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
|
66 |
+
options.append("None of these")
|
67 |
+
|
68 |
+
selected_option = st.radio(
|
69 |
+
f"Select correct match:",
|
70 |
+
options,
|
71 |
+
key=f"csv_radio_{current_player}"
|
72 |
+
)
|
73 |
+
|
74 |
+
if st.button("Confirm Selection", key="csv_confirm"):
|
75 |
+
if selected_option != "None of these":
|
76 |
+
selected_name = selected_option.split(" (")[0]
|
77 |
+
# Update CSV DataFrame
|
78 |
+
csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name
|
79 |
+
st.success(f"Replaced '{current_player}' with '{selected_name}'")
|
80 |
+
st.session_state['csv_file'] = csv_df
|
81 |
+
|
82 |
+
# Move to next player
|
83 |
+
st.session_state.csv_current_player_index += 1
|
84 |
+
st.rerun()
|
85 |
+
else:
|
86 |
+
st.success("All players have been processed!")
|
87 |
+
# Reset the index for future runs
|
88 |
+
st.session_state.csv_current_player_index = 0
|
89 |
+
st.session_state.csv_players_to_process = []
|
90 |
+
else:
|
91 |
+
st.success("All CSV players found in projections!")
|
92 |
+
|
93 |
+
return csv_df
|
global_func/find_name_mismatches.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
def find_name_mismatches(portfolio_df, projections_df):
|
8 |
+
"""
|
9 |
+
Find and handle name mismatches between portfolio and projections dataframes.
|
10 |
+
Returns the updated projections dataframe with matched names.
|
11 |
+
"""
|
12 |
+
# Get all player names from portfolio
|
13 |
+
portfolio_players = set()
|
14 |
+
for col in portfolio_df.columns:
|
15 |
+
if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Win%', 'Lineup Edge']:
|
16 |
+
portfolio_players.update(portfolio_df[col].unique())
|
17 |
+
|
18 |
+
# Get all player names from projections
|
19 |
+
projection_players_list = projections_df['player_names'].tolist()
|
20 |
+
|
21 |
+
# Find players in portfolio that are missing from projections
|
22 |
+
players_missing_from_projections = [player for player in portfolio_players if player not in projection_players_list]
|
23 |
+
|
24 |
+
# Automatically handle 100% matches before starting interactive process
|
25 |
+
players_to_process = []
|
26 |
+
for player in players_missing_from_projections:
|
27 |
+
if not isinstance(player, str):
|
28 |
+
st.warning(f"Skipping non-string value: {player}")
|
29 |
+
continue
|
30 |
+
closest_matches = process.extract(player, projection_players_list, limit=1)
|
31 |
+
if closest_matches[0][1] == 90: # If perfect match found
|
32 |
+
match_name = closest_matches[0][0]
|
33 |
+
projections_df.loc[projections_df['player_names'] == match_name, 'player_names'] = player
|
34 |
+
st.success(f"Automatically matched '{match_name}' with '{player}' (100% match)")
|
35 |
+
else:
|
36 |
+
players_to_process.append(player)
|
37 |
+
|
38 |
+
# Display results
|
39 |
+
if players_missing_from_projections:
|
40 |
+
st.warning("Players in portfolio but missing from projections")
|
41 |
+
|
42 |
+
# Display remaining players
|
43 |
+
if players_to_process:
|
44 |
+
st.info(f"Players to process ({len(players_to_process)}):\n" +
|
45 |
+
"\n".join(f"- {player}" for player in players_to_process))
|
46 |
+
|
47 |
+
# Create a form for batch processing
|
48 |
+
with st.form("player_matching_form"):
|
49 |
+
# Create tabs for each player
|
50 |
+
tabs = st.tabs([f"Player {i+1}" for i in range(len(players_to_process))])
|
51 |
+
|
52 |
+
# Dictionary to store selections
|
53 |
+
selections = {}
|
54 |
+
|
55 |
+
# Process each tab
|
56 |
+
for idx, (tab, player) in enumerate(zip(tabs, players_to_process)):
|
57 |
+
with tab:
|
58 |
+
st.write(f"**Missing Player {idx + 1} of {len(players_to_process)}:** {player}")
|
59 |
+
|
60 |
+
# Find the top 3 closest matches
|
61 |
+
closest_matches = process.extract(player, projection_players_list, limit=3)
|
62 |
+
|
63 |
+
# Create radio buttons for selection
|
64 |
+
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
|
65 |
+
options.append("None of these")
|
66 |
+
|
67 |
+
selected_option = st.radio(
|
68 |
+
f"Select correct match for {player}:",
|
69 |
+
options,
|
70 |
+
key=f"radio_{player}"
|
71 |
+
)
|
72 |
+
|
73 |
+
selections[player] = selected_option
|
74 |
+
|
75 |
+
# Submit button for the entire form
|
76 |
+
submitted = st.form_submit_button("Submit All Changes")
|
77 |
+
|
78 |
+
if submitted:
|
79 |
+
# Process all selections
|
80 |
+
for player, selection in selections.items():
|
81 |
+
if selection != "None of these":
|
82 |
+
selected_name = selection.split(" (")[0]
|
83 |
+
projections_df.loc[projections_df['player_names'] == selected_name, 'player_names'] = player
|
84 |
+
st.success(f"Replaced '{selected_name}' with '{player}'")
|
85 |
+
|
86 |
+
# Update session state
|
87 |
+
st.session_state['projections_df'] = projections_df
|
88 |
+
st.success("All player name changes have been applied!")
|
89 |
+
else:
|
90 |
+
st.success("All portfolio players found in projections!")
|
91 |
+
|
92 |
+
return projections_df
|
global_func/highlight_rows.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
def highlight_changes(row):
|
8 |
+
original_row = st.session_state['portfolio'].iloc[row.name]
|
9 |
+
colors = [''] * len(row)
|
10 |
+
for i, (orig, new) in enumerate(zip(original_row, row)):
|
11 |
+
if orig != new:
|
12 |
+
colors[i] = 'background-color: yellow'
|
13 |
+
return colors
|
14 |
+
|
15 |
+
def highlight_changes_winners(row):
|
16 |
+
original_row = st.session_state['optimized_df_medians'].iloc[row.name]
|
17 |
+
colors = [''] * len(row)
|
18 |
+
for i, (orig, new) in enumerate(zip(original_row, row)):
|
19 |
+
if orig != new:
|
20 |
+
colors[i] = 'background-color: aqua'
|
21 |
+
return colors
|
22 |
+
|
23 |
+
def highlight_changes_losers(row):
|
24 |
+
original_row = st.session_state['optimized_df_winners'].iloc[row.name]
|
25 |
+
colors = [''] * len(row)
|
26 |
+
for i, (orig, new) in enumerate(zip(original_row, row)):
|
27 |
+
if orig != new:
|
28 |
+
colors[i] = 'background-color: darksalmon'
|
29 |
+
return colors
|
global_func/load_csv.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
def load_csv(upload):
|
8 |
+
if upload is not None:
|
9 |
+
try:
|
10 |
+
if upload.name.endswith('.csv'):
|
11 |
+
df = pd.read_csv(upload)
|
12 |
+
try:
|
13 |
+
df['Name + ID'] = df['Name'] + ' (' + df['ID'].astype(str) + ')'
|
14 |
+
except:
|
15 |
+
pass
|
16 |
+
else:
|
17 |
+
st.error('Please upload either a CSV or Excel file')
|
18 |
+
return None
|
19 |
+
|
20 |
+
return df
|
21 |
+
except Exception as e:
|
22 |
+
st.error(f'Error loading file: {str(e)}')
|
23 |
+
return None
|
24 |
+
return None
|
global_func/load_file.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
## import global functions
|
8 |
+
from global_func.clean_player_name import clean_player_name
|
9 |
+
|
10 |
+
def load_file(upload):
|
11 |
+
if upload is not None:
|
12 |
+
try:
|
13 |
+
if upload.name.endswith('.csv'):
|
14 |
+
df = pd.read_csv(upload)
|
15 |
+
elif upload.name.endswith(('.xls', '.xlsx')):
|
16 |
+
df = pd.read_excel(upload)
|
17 |
+
else:
|
18 |
+
st.error('Please upload either a CSV or Excel file')
|
19 |
+
return None
|
20 |
+
|
21 |
+
export_df = df.copy()
|
22 |
+
|
23 |
+
for col in df.columns:
|
24 |
+
if df[col].dtype == 'object':
|
25 |
+
df[col] = df[col].apply(lambda x: clean_player_name(x) if isinstance(x, str) else x)
|
26 |
+
|
27 |
+
return export_df, df
|
28 |
+
except Exception as e:
|
29 |
+
st.error(f'Error loading file: {str(e)}')
|
30 |
+
return None
|
31 |
+
return None
|
global_func/load_ss_file.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
def load_ss_file(lineups, csv_file):
|
8 |
+
df = csv_file.copy()
|
9 |
+
try:
|
10 |
+
name_dict = dict(zip(df['ID'], df['Name']))
|
11 |
+
except:
|
12 |
+
name_dict = dict(zip(df['Id'], df['Nickname']))
|
13 |
+
|
14 |
+
# Now load and process the lineups file
|
15 |
+
try:
|
16 |
+
if lineups.name.endswith('.csv'):
|
17 |
+
lineups_df = pd.read_csv(lineups)
|
18 |
+
elif lineups.name.endswith(('.xls', '.xlsx')):
|
19 |
+
lineups_df = pd.read_excel(lineups)
|
20 |
+
else:
|
21 |
+
st.error('Please upload either a CSV or Excel file for lineups')
|
22 |
+
return None, None
|
23 |
+
|
24 |
+
export_df = lineups_df.copy()
|
25 |
+
|
26 |
+
# Map the IDs to names
|
27 |
+
for col in lineups_df.columns:
|
28 |
+
lineups_df[col] = lineups_df[col].map(name_dict)
|
29 |
+
|
30 |
+
return export_df, lineups_df
|
31 |
+
|
32 |
+
except Exception as e:
|
33 |
+
st.error(f'Error loading lineups file: {str(e)}')
|
34 |
+
return None, None
|
global_func/optimize_lineup.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
def optimize_lineup(row):
|
8 |
+
current_lineup = []
|
9 |
+
total_salary = 0
|
10 |
+
salary_cap = 50000
|
11 |
+
used_players = set()
|
12 |
+
|
13 |
+
# Convert row to dictionary with roster positions
|
14 |
+
roster = {}
|
15 |
+
for col, player in zip(row.index, row):
|
16 |
+
if col not in ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Lineup Edge']:
|
17 |
+
roster[col] = {
|
18 |
+
'name': player,
|
19 |
+
'position': map_dict['pos_map'].get(player, '').split('/'),
|
20 |
+
'team': map_dict['team_map'].get(player, ''),
|
21 |
+
'salary': map_dict['salary_map'].get(player, 0),
|
22 |
+
'median': map_dict['proj_map'].get(player, 0),
|
23 |
+
'ownership': map_dict['own_map'].get(player, 0)
|
24 |
+
}
|
25 |
+
total_salary += roster[col]['salary']
|
26 |
+
used_players.add(player)
|
27 |
+
|
28 |
+
# Optimize each roster position in random order
|
29 |
+
roster_positions = list(roster.items())
|
30 |
+
random.shuffle(roster_positions)
|
31 |
+
|
32 |
+
for roster_pos, current in roster_positions:
|
33 |
+
# Skip optimization for players from removed teams
|
34 |
+
if current['team'] in remove_teams_var:
|
35 |
+
continue
|
36 |
+
|
37 |
+
valid_positions = position_rules[roster_pos]
|
38 |
+
better_options = []
|
39 |
+
|
40 |
+
# Find valid replacements for this roster position
|
41 |
+
for pos in valid_positions:
|
42 |
+
if pos in position_groups:
|
43 |
+
pos_options = [
|
44 |
+
p for p in position_groups[pos]
|
45 |
+
if p['median'] > current['median']
|
46 |
+
and (total_salary - current['salary'] + p['salary']) <= salary_cap
|
47 |
+
and p['player_names'] not in used_players
|
48 |
+
and any(valid_pos in p['positions'] for valid_pos in valid_positions)
|
49 |
+
and map_dict['team_map'].get(p['player_names']) not in remove_teams_var # Check team restriction
|
50 |
+
]
|
51 |
+
better_options.extend(pos_options)
|
52 |
+
|
53 |
+
if better_options:
|
54 |
+
# Remove duplicates
|
55 |
+
better_options = {opt['player_names']: opt for opt in better_options}.values()
|
56 |
+
|
57 |
+
# Sort by median projection and take the best one
|
58 |
+
best_replacement = max(better_options, key=lambda x: x['median'])
|
59 |
+
|
60 |
+
# Update the lineup and tracking variables
|
61 |
+
used_players.remove(current['name'])
|
62 |
+
used_players.add(best_replacement['player_names'])
|
63 |
+
total_salary = total_salary - current['salary'] + best_replacement['salary']
|
64 |
+
roster[roster_pos] = {
|
65 |
+
'name': best_replacement['player_names'],
|
66 |
+
'position': map_dict['pos_map'][best_replacement['player_names']].split('/'),
|
67 |
+
'team': map_dict['team_map'][best_replacement['player_names']],
|
68 |
+
'salary': best_replacement['salary'],
|
69 |
+
'median': best_replacement['median'],
|
70 |
+
'ownership': best_replacement['ownership']
|
71 |
+
}
|
72 |
+
|
73 |
+
# Return optimized lineup maintaining original column order
|
74 |
+
return [roster[pos]['name'] for pos in row.index if pos in roster]
|
global_func/predict_dupes.py
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var):
|
8 |
+
if strength_var == 'Weak':
|
9 |
+
dupes_multiplier = .75
|
10 |
+
percentile_multiplier = .90
|
11 |
+
elif strength_var == 'Average':
|
12 |
+
dupes_multiplier = 1.00
|
13 |
+
percentile_multiplier = 1.00
|
14 |
+
elif strength_var == 'Sharp':
|
15 |
+
dupes_multiplier = 1.25
|
16 |
+
percentile_multiplier = 1.10
|
17 |
+
max_ownership = max(maps_dict['own_map'].values()) / 100
|
18 |
+
average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100
|
19 |
+
if site_var == 'Fanduel':
|
20 |
+
if type_var == 'Showdown':
|
21 |
+
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
|
22 |
+
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
|
23 |
+
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
24 |
+
flex_ownerships = pd.concat([
|
25 |
+
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
26 |
+
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
27 |
+
portfolio.iloc[:,3].map(maps_dict['own_map']),
|
28 |
+
portfolio.iloc[:,4].map(maps_dict['own_map'])
|
29 |
+
])
|
30 |
+
flex_rank = flex_ownerships.rank(pct=True)
|
31 |
+
|
32 |
+
# Assign ranks back to individual columns using the same rank scale
|
33 |
+
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
|
34 |
+
portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
35 |
+
portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
36 |
+
portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
37 |
+
portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
38 |
+
|
39 |
+
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
|
40 |
+
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
|
41 |
+
portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
|
42 |
+
portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
|
43 |
+
portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
|
44 |
+
|
45 |
+
portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
|
46 |
+
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
|
47 |
+
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
|
48 |
+
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
|
49 |
+
|
50 |
+
# Calculate dupes formula
|
51 |
+
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100)
|
52 |
+
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
|
53 |
+
|
54 |
+
# Round and handle negative values
|
55 |
+
portfolio['Dupes'] = np.where(
|
56 |
+
np.round(portfolio['dupes_calc'], 0) <= 0,
|
57 |
+
0,
|
58 |
+
np.round(portfolio['dupes_calc'], 0) - 1
|
59 |
+
)
|
60 |
+
if type_var == 'Classic':
|
61 |
+
num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
|
62 |
+
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
63 |
+
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
64 |
+
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
65 |
+
for i in range(1, num_players + 1):
|
66 |
+
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
67 |
+
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
68 |
+
|
69 |
+
portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
|
70 |
+
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
|
71 |
+
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
|
72 |
+
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
|
73 |
+
|
74 |
+
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100)
|
75 |
+
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
|
76 |
+
# Round and handle negative values
|
77 |
+
portfolio['Dupes'] = np.where(
|
78 |
+
np.round(portfolio['dupes_calc'], 0) <= 0,
|
79 |
+
0,
|
80 |
+
np.round(portfolio['dupes_calc'], 0) - 1
|
81 |
+
)
|
82 |
+
|
83 |
+
elif site_var == 'Draftkings':
|
84 |
+
if type_var == 'Showdown':
|
85 |
+
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
86 |
+
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
87 |
+
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
88 |
+
flex_ownerships = pd.concat([
|
89 |
+
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
90 |
+
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
91 |
+
portfolio.iloc[:,3].map(maps_dict['own_map']),
|
92 |
+
portfolio.iloc[:,4].map(maps_dict['own_map']),
|
93 |
+
portfolio.iloc[:,5].map(maps_dict['own_map'])
|
94 |
+
])
|
95 |
+
flex_rank = flex_ownerships.rank(pct=True)
|
96 |
+
|
97 |
+
# Assign ranks back to individual columns using the same rank scale
|
98 |
+
portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True)
|
99 |
+
portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
100 |
+
portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
101 |
+
portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
102 |
+
portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
103 |
+
portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0])
|
104 |
+
|
105 |
+
portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100
|
106 |
+
portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100
|
107 |
+
portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100
|
108 |
+
portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100
|
109 |
+
portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100
|
110 |
+
portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100
|
111 |
+
|
112 |
+
portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
|
113 |
+
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
|
114 |
+
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
|
115 |
+
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
|
116 |
+
|
117 |
+
# Calculate dupes formula
|
118 |
+
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100)
|
119 |
+
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
|
120 |
+
|
121 |
+
# Round and handle negative values
|
122 |
+
portfolio['Dupes'] = np.where(
|
123 |
+
np.round(portfolio['dupes_calc'], 0) <= 0,
|
124 |
+
0,
|
125 |
+
np.round(portfolio['dupes_calc'], 0) - 1
|
126 |
+
)
|
127 |
+
if type_var == 'Classic':
|
128 |
+
num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']])
|
129 |
+
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
130 |
+
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
131 |
+
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
132 |
+
for i in range(1, num_players + 1):
|
133 |
+
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
134 |
+
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
135 |
+
|
136 |
+
portfolio['own_product'] = (portfolio[own_columns].product(axis=1))
|
137 |
+
portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100
|
138 |
+
portfolio['own_sum'] = portfolio[own_columns].sum(axis=1)
|
139 |
+
portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1)
|
140 |
+
|
141 |
+
portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100)
|
142 |
+
portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier
|
143 |
+
# Round and handle negative values
|
144 |
+
portfolio['Dupes'] = np.where(
|
145 |
+
np.round(portfolio['dupes_calc'], 0) <= 0,
|
146 |
+
0,
|
147 |
+
np.round(portfolio['dupes_calc'], 0) - 1
|
148 |
+
)
|
149 |
+
|
150 |
+
portfolio['Dupes'] = np.round(portfolio['Dupes'], 0)
|
151 |
+
portfolio['own_ratio'] = np.where(
|
152 |
+
portfolio[own_columns].isin([max_ownership]).any(axis=1),
|
153 |
+
portfolio['own_sum'] / portfolio['own_average'],
|
154 |
+
(portfolio['own_sum'] - max_ownership) / portfolio['own_average']
|
155 |
+
)
|
156 |
+
percentile_cut_scalar = portfolio['median'].max() # Get scalar value
|
157 |
+
if type_var == 'Classic':
|
158 |
+
own_ratio_nerf = 2
|
159 |
+
elif type_var == 'Showdown':
|
160 |
+
own_ratio_nerf = 1.5
|
161 |
+
portfolio['Finish_percentile'] = portfolio.apply(
|
162 |
+
lambda row: .0005 if (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2) < .0005
|
163 |
+
else (row['own_ratio'] - own_ratio_nerf) / ((10 * (row['median'] / percentile_cut_scalar)) / 2),
|
164 |
+
axis=1
|
165 |
+
)
|
166 |
+
|
167 |
+
portfolio['Ref_Proj'] = portfolio['median'].max()
|
168 |
+
portfolio['Max_Proj'] = portfolio['Ref_Proj'] + 10
|
169 |
+
portfolio['Min_Proj'] = portfolio['Ref_Proj'] - 10
|
170 |
+
portfolio['Avg_Ref'] = (portfolio['Max_Proj'] + portfolio['Min_Proj']) / 2
|
171 |
+
portfolio['Win%'] = (((portfolio['median'] / portfolio['Avg_Ref']) - (0.1 + ((portfolio['Ref_Proj'] - portfolio['median'])/100))) / (Contest_Size / 1000)) / 10
|
172 |
+
max_allowed_win = (1 / Contest_Size) * 5
|
173 |
+
portfolio['Win%'] = portfolio['Win%'] / portfolio['Win%'].max() * max_allowed_win
|
174 |
+
|
175 |
+
portfolio['Finish_percentile'] = portfolio['Finish_percentile'] + .005 + (.005 * (Contest_Size / 10000))
|
176 |
+
portfolio['Finish_percentile'] = portfolio['Finish_percentile'] * percentile_multiplier
|
177 |
+
portfolio['Win%'] = portfolio['Win%'] * (1 - portfolio['Finish_percentile'])
|
178 |
+
|
179 |
+
portfolio['low_own_count'] = portfolio[own_columns].apply(lambda row: (row < 0.10).sum(), axis=1)
|
180 |
+
portfolio['Finish_percentile'] = portfolio.apply(lambda row: row['Finish_percentile'] if row['low_own_count'] <= 0 else row['Finish_percentile'] / row['low_own_count'], axis=1)
|
181 |
+
portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5))
|
182 |
+
portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1)
|
183 |
+
portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
|
184 |
+
portfolio = portfolio.drop(columns=dup_count_columns)
|
185 |
+
portfolio = portfolio.drop(columns=own_columns)
|
186 |
+
portfolio = portfolio.drop(columns=calc_columns)
|
187 |
+
|
188 |
+
return portfolio
|