import streamlit as st import numpy as np import pandas as pd import time from fuzzywuzzy import process def find_name_mismatches(contest_df, projections_df, ownership_dict, fpts_dict): # Create a copy of the projections dataframe to avoid modifying the original projections_df = projections_df.copy() contest_df = contest_df.copy() ownership_dict = ownership_dict.copy() fpts_dict = fpts_dict.copy() name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']] if 'player_names' not in projections_df.columns: st.error("No 'player_names' column found in projections file") return contest_df, projections_df # Get unique player names from portfolio and projections portfolio_players = set() for col in name_columns: portfolio_players.update(contest_df[col].unique()) projection_players = set(projections_df['player_names'].unique()) portfolio_players_list = list(portfolio_players) projection_players_list = list(projection_players) # Find players in portfolio that are missing from projections players_missing_from_projections = list(projection_players - portfolio_players) # Automatically handle 90%+ matches before starting interactive process auto_matches = {} players_to_process = [] for player in players_missing_from_projections: if not isinstance(player, str): st.warning(f"Skipping non-string value: {player}") continue closest_matches = process.extract(player, portfolio_players_list, limit=1) if closest_matches[0][1] >= 90: # If high confidence match found match_name = closest_matches[0][0] auto_matches[player] = match_name st.success(f"Automatically matched '{player}' with '{match_name}' ({closest_matches[0][1]}% match)") else: players_to_process.append(player) if players_to_process: st.warning(f"Found {len(players_to_process)} players that need manual matching") # Create a form for batch processing with st.form("name_matching_form"): # Create tabs for each player tabs = st.tabs([f"Player {i+1}" for i in range(len(players_to_process))]) # Dictionary to store selections selections = {} # Populate each tab for i, player in enumerate(players_to_process): with tabs[i]: st.write(f"**Projection Name:** {player}") # Find the top 3 closest matches closest_matches = process.extract(player, portfolio_players_list, limit=3) # Create radio buttons for selection options = [f"{match[0]} ({match[1]}%)" for match in closest_matches] options.append("None of these") selections[player] = st.radio( f"Select correct match:", options, key=f"radio_{player}" ) # Submit button for the entire form submitted = st.form_submit_button("Apply All Changes") if submitted: # Process automatic matches for projection_name, contest_name in auto_matches.items(): for col in name_columns: contest_df[col] = contest_df[col].replace(contest_name, projection_name) if contest_name in ownership_dict: ownership_dict[projection_name] = ownership_dict.pop(contest_name) if contest_name in fpts_dict: fpts_dict[projection_name] = fpts_dict.pop(contest_name) # Process manual selections for projection_name, selection in selections.items(): if selection != "None of these": selected_name = selection.split(" (")[0] for col in name_columns: contest_df[col] = contest_df[col].replace(selected_name, projection_name) if selected_name in ownership_dict: ownership_dict[projection_name] = ownership_dict.pop(selected_name) if selected_name in fpts_dict: fpts_dict[projection_name] = fpts_dict.pop(selected_name) st.success(f"Replaced '{selected_name}' with '{projection_name}'") st.success("All changes applied successfully!") return contest_df, projections_df # Return the current state if form hasn't been submitted yet return contest_df, projections_df else: st.success("All players have been automatically matched!") # Apply automatic matches for projection_name, contest_name in auto_matches.items(): for col in name_columns: contest_df[col] = contest_df[col].replace(contest_name, projection_name) if contest_name in ownership_dict: ownership_dict[projection_name] = ownership_dict.pop(contest_name) if contest_name in fpts_dict: fpts_dict[projection_name] = fpts_dict.pop(contest_name) return contest_df, projections_df, ownership_dict, fpts_dict