DFS_Contest_Analyzer / global_func /find_name_mismatches.py
James McCool
Add functionality for player name cleaning and CSV mismatch detection
d04558f
raw
history blame
4.27 kB
import streamlit as st
import numpy as np
import pandas as pd
import time
from fuzzywuzzy import process
def find_name_mismatches(portfolio_df, projections_df):
# Create a copy of the projections dataframe to avoid modifying the original
projections_df = projections_df.copy()
col_count = len(portfolio_df.columns)
portfolio_df.columns = range(col_count)
if 'player_names' not in projections_df.columns:
st.error("No 'player_names' column found in projections file")
return projections_df
# Get unique player names from portfolio and projections
portfolio_players = set()
for col in portfolio_df.columns:
portfolio_players.update(portfolio_df[col].unique())
projection_players = set(projections_df['player_names'].unique())
projection_players_list = list(projection_players)
# Find players in portfolio that are missing from projections
players_missing_from_projections = list(portfolio_players - projection_players)
# Automatically handle 100% matches before starting interactive process
players_to_process = []
for player in players_missing_from_projections:
if not isinstance(player, str):
st.warning(f"Skipping non-string value: {player}")
continue
closest_matches = process.extract(player, projection_players_list, limit=1)
if closest_matches[0][1] == 100: # If perfect match found
match_name = closest_matches[0][0]
projections_df.loc[projections_df['player_names'] == match_name, 'player_names'] = player
st.success(f"Automatically matched '{match_name}' with '{player}' (100% match)")
else:
players_to_process.append(player)
# Initialize session state for tracking current player if not exists
if 'current_player_index' not in st.session_state:
st.session_state.current_player_index = 0
st.session_state.players_to_process = players_to_process
# Display results
if players_missing_from_projections:
st.warning("Players in portfolio but missing from projections")
# Display remaining players
remaining_players = st.session_state.players_to_process[st.session_state.current_player_index:]
st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
"\n".join(f"- {player}" for player in remaining_players))
if st.session_state.current_player_index < len(st.session_state.players_to_process):
current_player = st.session_state.players_to_process[st.session_state.current_player_index]
# Find the top 3 closest matches
closest_matches = process.extract(current_player, projection_players_list, limit=3)
st.write(f"**Missing Player {st.session_state.current_player_index + 1} of {len(st.session_state.players_to_process)}:** {current_player}")
# Create radio buttons for selection
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
options.append("None of these")
selected_option = st.radio(
f"Select correct match:",
options,
key=f"radio_{current_player}"
)
if st.button("Confirm Selection"):
if selected_option != "None of these":
selected_name = selected_option.split(" (")[0]
projections_df.loc[projections_df['player_names'] == selected_name, 'player_names'] = current_player
st.success(f"Replaced '{selected_name}' with '{current_player}'")
st.session_state['projections_df'] = projections_df
# Move to next player
st.session_state.current_player_index += 1
st.rerun()
else:
st.success("All players have been processed!")
# Reset the index for future runs
st.session_state.current_player_index = 0
st.session_state.players_to_process = []
else:
st.success("All portfolio players found in projections!")
return projections_df