File size: 4,277 Bytes
d04558f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import streamlit as st
import numpy as np
import pandas as pd
from fuzzywuzzy import process
def find_csv_mismatches(csv_df, projections_df):
# Create copies of the dataframes to avoid modifying the originals
csv_df = csv_df.copy()
projections_df = projections_df.copy()
if 'Name' not in csv_df.columns:
st.error("No 'Name' column found in CSV file")
return csv_df
if 'player_names' not in projections_df.columns:
st.error("No 'player_names' column found in projections file")
return csv_df
# Get unique player names from CSV and projections
csv_players = set(csv_df['Name'].dropna().unique())
projection_players = set(projections_df['player_names'].unique())
projection_players_list = list(csv_players)
# Find players in CSV that are missing from projections
players_missing_from_projections = list(projection_players - csv_players)
# Automatically handle 100% matches before starting interactive process
players_to_process = []
for player in players_missing_from_projections:
if not isinstance(player, str):
st.warning(f"Skipping non-string value: {player}")
continue
closest_matches = process.extract(player, projection_players_list, limit=1)
if closest_matches[0][1] == 100: # If perfect match found
match_name = closest_matches[0][0]
# Update CSV DataFrame to use the projection name
csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name
st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
else:
players_to_process.append(player)
# Initialize session state for tracking current player if not exists
if 'csv_current_player_index' not in st.session_state:
st.session_state.csv_current_player_index = 0
st.session_state.csv_players_to_process = players_to_process
# Display results
if players_missing_from_projections:
st.warning("Players in CSV but missing from projections")
# Display remaining players
remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:]
st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
"\n".join(f"- {player}" for player in remaining_players))
if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process):
current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index]
# Find the top 3 closest matches
closest_matches = process.extract(current_player, projection_players_list, limit=3)
st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}")
# Create radio buttons for selection
options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
options.append("None of these")
selected_option = st.radio(
f"Select correct match:",
options,
key=f"csv_radio_{current_player}"
)
if st.button("Confirm Selection", key="csv_confirm"):
if selected_option != "None of these":
selected_name = selected_option.split(" (")[0]
# Update CSV DataFrame
csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name
st.success(f"Replaced '{current_player}' with '{selected_name}'")
st.session_state['csv_file'] = csv_df
# Move to next player
st.session_state.csv_current_player_index += 1
st.rerun()
else:
st.success("All players have been processed!")
# Reset the index for future runs
st.session_state.csv_current_player_index = 0
st.session_state.csv_players_to_process = []
else:
st.success("All CSV players found in projections!")
return csv_df |