DFS_Contest_Analyzer / global_func /find_name_mismatches.py
James McCool
Refactor find_name_mismatches function in find_name_mismatches.py
5336bfc
import streamlit as st
from fuzzywuzzy import process
def find_name_mismatches(contest_df, projections_df, ownership_df, fpts_df):
name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
if 'player_names' not in projections_df.columns:
st.error("No 'player_names' column found in projections file")
return contest_df, projections_df
# Get unique player names from portfolio and projections
portfolio_players = set()
for col in name_columns:
portfolio_players.update(contest_df[col].unique())
projection_players = set(projections_df['player_names'].unique())
portfolio_players_list = list(portfolio_players)
projection_players_list = list(projection_players)
# Find players in portfolio that are missing from projections
players_missing_from_projections = list(projection_players - portfolio_players)
# Automatically handle 90%+ matches before starting interactive process
auto_matches = {}
players_to_process = []
for player in players_missing_from_projections:
if not isinstance(player, str):
st.warning(f"Skipping non-string value: {player}")
continue
closest_matches = process.extract(player, portfolio_players_list, limit=1)
if closest_matches[0][1] >= 95: # If high confidence match found
match_name = closest_matches[0][0]
auto_matches[player] = match_name
st.success(f"Automatically matched '{player}' with '{match_name}' ({closest_matches[0][1]}% match)")
else:
players_to_process.append(player)
if players_to_process:
st.warning(f"Found {len(players_to_process)} players that need manual matching")
# Create a form for batch processing
with st.form("name_matching_form"):
# Create tabs for each player
tabs = st.tabs([f"Player {i+1}" for i in range(len(players_to_process))])
# Dictionary to store selections
selections = {}
# Populate each tab
for i, player in enumerate(players_to_process):
with tabs[i]:
st.write(f"**Projection Name:** {player}")
# Find the top 3 closest matches
closest_matches = process.extract(player, portfolio_players_list, limit=3)
# Create radio buttons for selection
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
options.append("None of these")
selections[player] = st.radio(
f"Select correct match:",
options,
key=f"radio_{player}"
)
# Submit button for the entire form
submitted = st.form_submit_button("Apply All Changes")
if submitted:
# Process automatic matches
for projection_name, contest_name in auto_matches.items():
for col in name_columns:
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
ownership_df['Player'] = ownership_df['Player'].replace(contest_name, projection_name)
fpts_df['Player'] = fpts_df['Player'].replace(contest_name, projection_name)
st.write(contest_name + ' ' + projection_name)
# Process manual selections
for projection_name, selection in selections.items():
if selection != "None of these":
selected_name = selection.split(" (")[0]
for col in name_columns:
contest_df[col] = contest_df[col].replace(selected_name, projection_name)
ownership_df['Player'] = ownership_df['Player'].replace(selected_name, projection_name)
fpts_df['Player'] = fpts_df['Player'].replace(selected_name, projection_name)
st.write(contest_name + ' ' + projection_name)
st.success(f"Replaced '{selected_name}' with '{projection_name}'")
st.success("All changes applied successfully!")
return contest_df, projections_df, ownership_df, fpts_df
else:
st.success("All players have been automatically matched!")
# Apply automatic matches
for projection_name, contest_name in auto_matches.items():
for col in name_columns:
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
ownership_df['Player'] = ownership_df['Player'].replace(contest_name, projection_name)
fpts_df['Player'] = fpts_df['Player'].replace(contest_name, projection_name)
st.write(contest_name + ' ' + projection_name)
return contest_df, projections_df, ownership_df, fpts_df