File size: 4,277 Bytes
d04558f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import streamlit as st
import numpy as np
import pandas as pd
from fuzzywuzzy import process

def find_csv_mismatches(csv_df, projections_df):
    # Create copies of the dataframes to avoid modifying the originals
    csv_df = csv_df.copy()
    projections_df = projections_df.copy()
    
    if 'Name' not in csv_df.columns:
        st.error("No 'Name' column found in CSV file")
        return csv_df
        
    if 'player_names' not in projections_df.columns:
        st.error("No 'player_names' column found in projections file")
        return csv_df
    
    # Get unique player names from CSV and projections
    csv_players = set(csv_df['Name'].dropna().unique())
    projection_players = set(projections_df['player_names'].unique())
    projection_players_list = list(csv_players)
    
    # Find players in CSV that are missing from projections
    players_missing_from_projections = list(projection_players - csv_players)
    
    # Automatically handle 100% matches before starting interactive process
    players_to_process = []
    for player in players_missing_from_projections:
        if not isinstance(player, str):
            st.warning(f"Skipping non-string value: {player}")
            continue
        closest_matches = process.extract(player, projection_players_list, limit=1)
        if closest_matches[0][1] == 100:  # If perfect match found
            match_name = closest_matches[0][0]
            # Update CSV DataFrame to use the projection name
            csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name
            st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
        else:
            players_to_process.append(player)
    
    # Initialize session state for tracking current player if not exists
    if 'csv_current_player_index' not in st.session_state:
        st.session_state.csv_current_player_index = 0
        st.session_state.csv_players_to_process = players_to_process
    
    # Display results
    if players_missing_from_projections:
        st.warning("Players in CSV but missing from projections")
        
        # Display remaining players
        remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:]
        st.info(f"Remaining players to process ({len(remaining_players)}):\n" + 
                "\n".join(f"- {player}" for player in remaining_players))
        
        if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process):
            current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index]
            
            # Find the top 3 closest matches
            closest_matches = process.extract(current_player, projection_players_list, limit=3)
            
            st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}")
            
            # Create radio buttons for selection
            options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
            options.append("None of these")
            
            selected_option = st.radio(
                f"Select correct match:",
                options,
                key=f"csv_radio_{current_player}"
            )
            
            if st.button("Confirm Selection", key="csv_confirm"):
                if selected_option != "None of these":
                    selected_name = selected_option.split(" (")[0]
                    # Update CSV DataFrame
                    csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name
                    st.success(f"Replaced '{current_player}' with '{selected_name}'")
                    st.session_state['csv_file'] = csv_df
                
                # Move to next player
                st.session_state.csv_current_player_index += 1
                st.rerun()
        else:
            st.success("All players have been processed!")
            # Reset the index for future runs
            st.session_state.csv_current_player_index = 0
            st.session_state.csv_players_to_process = []
    else:
        st.success("All CSV players found in projections!")
    
    return csv_df