Spaces:

Multichem-PD
/

DFS_Portfolio_Manager

Running

App Files Files Community

James McCool commited on May 28

Commit

730a147

1 Parent(s): 3deb246

Enhance name matching process in app.py: streamline the handling of player names by implementing a more efficient matching algorithm, updating session state management, and improving debug output for better traceability of matches.

Browse files

Files changed (1) hide show

app.py +51 -92

app.py CHANGED Viewed

@@ -135,101 +135,60 @@ with tab1:
                 projections = projections.apply(lambda x: x.replace(player_wrong_names_mlb, player_right_names_mlb))
                 st.dataframe(projections.head(10))
-    def create_site_mapping(site_csv):
-        """
-        Create a mapping dictionary from the site CSV that handles both Name and Nickname cases.
-        Args:
-            site_csv: DataFrame containing site data with either Name/Nickname and Name+ID/Id columns
-        Returns:
-            dict: Mapping of all possible name variations to their ID
-        """
-        mapping = {}
-        # Check which columns we have
-        has_name = 'Name' in site_csv.columns
-        has_nickname = 'Nickname' in site_csv.columns
-        has_name_id = 'Name + ID' in site_csv.columns
-        has_id = 'Id' in site_csv.columns
-        # Create mappings for all possible combinations
-        if has_name and has_name_id:
-            mapping.update(dict(zip(site_csv['Name'], site_csv['Name + ID'])))
-        if has_nickname and has_id:
-            mapping.update(dict(zip(site_csv['Nickname'], site_csv['Id'])))
-        return mapping
-    def standardize_names(df, name_columns, site_mapping):
-        """
-        Standardize names across a dataframe using the site mapping.
-        Args:
-            df: DataFrame containing player names
-            name_columns: List of column names containing player names
-            site_mapping: Dictionary mapping names to IDs from site CSV
-        Returns:
-            DataFrame: Updated dataframe with standardized names
-        """
-        df = df.copy()
-        # First try exact matches
-        for col in name_columns:
-            df[col] = df[col].map(lambda x: site_mapping.get(x, x))
-        # Then try fuzzy matching for any remaining unmatched names
-        unmatched = df[name_columns].apply(lambda x: x.isin(site_mapping.keys())).any(axis=1)
-        if unmatched.any():
-            for col in name_columns:
-                # Only process unmatched names
-                mask = ~df[col].isin(site_mapping.keys())
-                if mask.any():
-                    # Get fuzzy matches for unmatched names
-                    fuzzy_matches = {
-                        name: process.extractOne(name, list(site_mapping.keys()), score_cutoff=90)[0]
-                        for name in df.loc[mask, col].unique()
-                        if process.extractOne(name, list(site_mapping.keys()), score_cutoff=90)
-                    }
-                    # Apply fuzzy matches
-                    df.loc[mask, col] = df.loc[mask, col].map(lambda x: site_mapping.get(fuzzy_matches.get(x, x), x))
-        return df
-    def process_uploads(site_csv, portfolio_df, projections_df):
-        """
-        Process all three files and ensure name consistency.
-        Args:
-            site_csv: DataFrame from site CSV
-            portfolio_df: DataFrame containing portfolio data
-            projections_df: DataFrame containing projections
-        """
-        # Create site mapping
-        site_mapping = create_site_mapping(site_csv)
-        # Get portfolio columns that contain player names
-        portfolio_name_cols = [col for col in portfolio_df.columns
-                            if col not in ['salary', 'median', 'Own']]
-        # Get projections column name
-        projections_name_col = 'player_names'  # adjust if different
-        # Standardize names in both dataframes
-        portfolio_df = standardize_names(portfolio_df, portfolio_name_cols, site_mapping)
-        projections_df = standardize_names(projections_df, [projections_name_col], site_mapping)
-        return portfolio_df, projections_df
-    if portfolio_file and projections_file and csv_file:
-        # Process all files
-        portfolio_df, projections_df = process_uploads(csv_file, st.session_state['portfolio'], projections)
-        # Store in session state
-        st.session_state['portfolio'] = portfolio_df
-        st.session_state['projections_df'] = projections_df
 # with tab2:
 #     if st.button('Clear data', key='reset2'):

                 projections = projections.apply(lambda x: x.replace(player_wrong_names_mlb, player_right_names_mlb))
                 st.dataframe(projections.head(10))
+    if portfolio_file and projections_file:
+        if st.session_state['portfolio'] is not None and projections is not None:
+            st.subheader("Name Matching Analysis")
+            # Initialize projections_df in session state if it doesn't exist
+            if 'projections_df' not in st.session_state:
+                st.session_state['projections_df'] = projections.copy()
+                st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
+            # Update projections_df with any new matches
+            st.session_state['projections_df'] = find_name_mismatches(st.session_state['portfolio'], st.session_state['projections_df'])
+            try:
+                name_id_map = dict(zip(
+                    st.session_state['csv_file']['Name'],
+                    st.session_state['csv_file']['Name + ID']
+                ))
+                print("Using Name + ID mapping")
+            except:
+                name_id_map = dict(zip(
+                    st.session_state['csv_file']['Nickname'],
+                    st.session_state['csv_file']['Id']
+                ))
+                print("Using Nickname + Id mapping")
+            # Get all names at once
+            names = projections['player_names'].tolist()
+            choices = list(name_id_map.keys())
+            # Create a dictionary to store matches
+            match_dict = {}
+            # Process each name individually but more efficiently
+            for name in names:
+                # Use extractOne with score_cutoff for efficiency
+                match = process.extractOne(
+                    name,
+                    choices,
+                    score_cutoff=85
+                )
+                if match:
+                    match_dict[name] = name_id_map[match[0]]
+                else:
+                    match_dict[name] = name
+            print(f"Number of entries in match_dict: {len(match_dict)}")
+            print("Sample of match_dict:", list(match_dict.items())[:3])
+            # Apply the matches
+            projections['upload_match'] = projections['player_names'].map(match_dict)
+            st.session_state['export_dict'] = match_dict
+            st.write(st.session_state['export_dict'])
+            st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
 # with tab2:
 #     if st.button('Clear data', key='reset2'):