import streamlit as st import numpy as np import pandas as pd from fuzzywuzzy import process def find_csv_mismatches(csv_df, projections_df): # Create copies of the dataframes to avoid modifying the originals csv_df = csv_df.copy() projections_df = projections_df.copy() if 'Name' not in csv_df.columns: st.error("No 'Name' column found in CSV file") return csv_df if 'player_names' not in projections_df.columns: st.error("No 'player_names' column found in projections file") return csv_df # Get unique player names from CSV and projections csv_players = set(csv_df['Name'].dropna().unique()) projection_players = set(projections_df['player_names'].unique()) projection_players_list = list(csv_players) # Find players in CSV that are missing from projections players_missing_from_projections = list(projection_players - csv_players) # Automatically handle 100% matches before starting interactive process players_to_process = [] for player in players_missing_from_projections: if not isinstance(player, str): st.warning(f"Skipping non-string value: {player}") continue closest_matches = process.extract(player, projection_players_list, limit=1) if closest_matches[0][1] == 100: # If perfect match found match_name = closest_matches[0][0] # Update CSV DataFrame to use the projection name csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)") else: players_to_process.append(player) # Initialize session state for tracking current player if not exists if 'csv_current_player_index' not in st.session_state: st.session_state.csv_current_player_index = 0 st.session_state.csv_players_to_process = players_to_process # Display results if players_missing_from_projections: st.warning("Players in CSV but missing from projections") # Display remaining players remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:] st.info(f"Remaining players to process ({len(remaining_players)}):\n" + "\n".join(f"- {player}" for player in remaining_players)) if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process): current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index] # Find the top 3 closest matches closest_matches = process.extract(current_player, projection_players_list, limit=3) st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}") # Create radio buttons for selection options = [f"{match[0]} ({match[1]}%)" for match in closest_matches] options.append("None of these") selected_option = st.radio( f"Select correct match:", options, key=f"csv_radio_{current_player}" ) if st.button("Confirm Selection", key="csv_confirm"): if selected_option != "None of these": selected_name = selected_option.split(" (")[0] # Update CSV DataFrame csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name st.success(f"Replaced '{current_player}' with '{selected_name}'") st.session_state['csv_file'] = csv_df # Move to next player st.session_state.csv_current_player_index += 1 st.rerun() else: st.success("All players have been processed!") # Reset the index for future runs st.session_state.csv_current_player_index = 0 st.session_state.csv_players_to_process = [] else: st.success("All CSV players found in projections!") return csv_df