DFS_Contest_Analyzer / global_func /find_name_mismatches.py
James McCool
Fix logic in `find_name_mismatches.py` to correctly identify players missing from projections
16d2d15
raw
history blame
4.45 kB
import streamlit as st
import numpy as np
import pandas as pd
import time
from fuzzywuzzy import process
def find_name_mismatches(contest_df, projections_df):
# Create a copy of the projections dataframe to avoid modifying the original
projections_df = projections_df.copy()
contest_df = contest_df.copy()
name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
if 'player_names' not in projections_df.columns:
st.error("No 'player_names' column found in projections file")
return contest_df, projections_df
# Get unique player names from portfolio and projections
portfolio_players = set()
for col in name_columns:
portfolio_players.update(contest_df[col].unique())
projection_players = set(projections_df['player_names'].unique())
projection_players_list = list(projection_players)
# Find players in portfolio that are missing from projections
players_missing_from_projections = list(projection_players -portfolio_players)
# Automatically handle 100% matches before starting interactive process
players_to_process = []
for player in players_missing_from_projections:
if not isinstance(player, str):
st.warning(f"Skipping non-string value: {player}")
continue
closest_matches = process.extract(player, portfolio_players, limit=1)
if closest_matches[0][1] == 100: # If perfect match found
match_name = closest_matches[0][0]
# Update all occurrences in contest_df
for col in name_columns:
contest_df[col] = contest_df[col].replace(player, match_name)
st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
else:
players_to_process.append(player)
# Initialize session state for tracking current player if not exists
if 'current_player_index' not in st.session_state:
st.session_state.current_player_index = 0
st.session_state.players_to_process = players_to_process
# Display results
if players_missing_from_projections:
st.warning("Players in portfolio but missing from projections")
# Display remaining players
remaining_players = st.session_state.players_to_process[st.session_state.current_player_index:]
st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
"\n".join(f"- {player}" for player in remaining_players))
if st.session_state.current_player_index < len(st.session_state.players_to_process):
current_player = st.session_state.players_to_process[st.session_state.current_player_index]
# Find the top 3 closest matches
closest_matches = process.extract(current_player, projection_players_list, limit=3)
st.write(f"**Missing Player {st.session_state.current_player_index + 1} of {len(st.session_state.players_to_process)}:** {current_player}")
# Create radio buttons for selection
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
options.append("None of these")
selected_option = st.radio(
f"Select correct match:",
options,
key=f"radio_{current_player}"
)
if st.button("Confirm Selection"):
if selected_option != "None of these":
selected_name = selected_option.split(" (")[0]
# Update all occurrences in contest_df
for col in name_columns:
contest_df[col] = contest_df[col].replace(current_player, selected_name)
st.success(f"Replaced '{current_player}' with '{selected_name}'")
st.session_state['contest_df'] = contest_df
# Move to next player
st.session_state.current_player_index += 1
st.rerun()
else:
st.success("All players have been processed!")
# Reset the index for future runs
st.session_state.current_player_index = 0
st.session_state.players_to_process = []
else:
st.success("All portfolio players found in projections!")
return contest_df, projections_df