|
import streamlit as st |
|
import numpy as np |
|
import pandas as pd |
|
from fuzzywuzzy import process |
|
|
|
def find_csv_mismatches(csv_df, projections_df): |
|
|
|
csv_df = csv_df.copy() |
|
projections_df = projections_df.copy() |
|
|
|
if 'Name' not in csv_df.columns: |
|
st.error("No 'Name' column found in CSV file") |
|
return csv_df |
|
|
|
if 'player_names' not in projections_df.columns: |
|
st.error("No 'player_names' column found in projections file") |
|
return csv_df |
|
|
|
|
|
csv_players = set(csv_df['Name'].dropna().unique()) |
|
projection_players = set(projections_df['player_names'].unique()) |
|
projection_players_list = list(csv_players) |
|
|
|
|
|
players_missing_from_projections = list(projection_players - csv_players) |
|
|
|
|
|
players_to_process = [] |
|
for player in players_missing_from_projections: |
|
if not isinstance(player, str): |
|
st.warning(f"Skipping non-string value: {player}") |
|
continue |
|
closest_matches = process.extract(player, projection_players_list, limit=1) |
|
if closest_matches[0][1] == 100: |
|
match_name = closest_matches[0][0] |
|
|
|
csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name |
|
st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)") |
|
else: |
|
players_to_process.append(player) |
|
|
|
|
|
if 'csv_current_player_index' not in st.session_state: |
|
st.session_state.csv_current_player_index = 0 |
|
st.session_state.csv_players_to_process = players_to_process |
|
|
|
|
|
if players_missing_from_projections: |
|
st.warning("Players in CSV but missing from projections") |
|
|
|
|
|
remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:] |
|
st.info(f"Remaining players to process ({len(remaining_players)}):\n" + |
|
"\n".join(f"- {player}" for player in remaining_players)) |
|
|
|
if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process): |
|
current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index] |
|
|
|
|
|
closest_matches = process.extract(current_player, projection_players_list, limit=3) |
|
|
|
st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}") |
|
|
|
|
|
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches] |
|
options.append("None of these") |
|
|
|
selected_option = st.radio( |
|
f"Select correct match:", |
|
options, |
|
key=f"csv_radio_{current_player}" |
|
) |
|
|
|
if st.button("Confirm Selection", key="csv_confirm"): |
|
if selected_option != "None of these": |
|
selected_name = selected_option.split(" (")[0] |
|
|
|
csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name |
|
st.success(f"Replaced '{current_player}' with '{selected_name}'") |
|
st.session_state['csv_file'] = csv_df |
|
|
|
|
|
st.session_state.csv_current_player_index += 1 |
|
st.rerun() |
|
else: |
|
st.success("All players have been processed!") |
|
|
|
st.session_state.csv_current_player_index = 0 |
|
st.session_state.csv_players_to_process = [] |
|
else: |
|
st.success("All CSV players found in projections!") |
|
|
|
return csv_df |