James McCool commited on
Commit
be8e70e
·
1 Parent(s): 507fe5f

Refactor name matching process in app.py: optimize the matching logic by processing names individually with extractOne for improved efficiency, while maintaining the handling of name-to-ID mapping and ensuring consistent debug output for better user feedback.

Browse files
Files changed (1) hide show
  1. app.py +40 -49
app.py CHANGED
@@ -145,56 +145,47 @@ with tab1:
145
 
146
  # Update projections_df with any new matches
147
  st.session_state['projections_df'] = find_name_mismatches(st.session_state['portfolio'], st.session_state['projections_df'])
148
- try:
149
- name_id_map = dict(zip(
150
- st.session_state['csv_file']['Name'],
151
- st.session_state['csv_file']['Name + ID']
152
- ))
153
- print("Using Name + ID mapping")
154
- except:
155
- name_id_map = dict(zip(
156
- st.session_state['csv_file']['Nickname'],
157
- st.session_state['csv_file']['Id']
158
- ))
159
- print("Using Nickname + Id mapping")
160
-
161
- print(f"Number of names in name_id_map: {len(name_id_map)}")
162
- print("Sample of name_id_map:", list(name_id_map.items())[:3])
163
-
164
- # Get all names at once
165
- names = projections['player_names'].tolist()
166
- choices = list(name_id_map.keys())
167
-
168
- print(f"Number of names to match: {len(names)}")
169
- print("Sample of names to match:", names[:3])
170
- print("Sample of choices:", choices[:3])
171
-
172
- # Process all names in one batch
173
- matches = process.extract(
174
- names,
175
- choices,
176
- scorer=fuzz.ratio,
177
- score_cutoff=85,
178
- limit=1 # Only get the best match
179
- )
180
-
181
- print(f"Number of matches found: {len(matches)}")
182
- print("Sample of matches:", matches[:3])
183
-
184
- # Convert matches to dictionary - Fixed the match handling
185
- match_dict = {}
186
- for name, match_list in zip(names, matches):
187
- if match_list and match_list[0][1] >= 85: # Check if we have a match and it meets threshold
188
- match_dict[name] = name_id_map[match_list[0][0]] # Use the matched name to get the ID
189
- else:
190
- match_dict[name] = name # Keep original name if no good match
191
-
192
- print(f"Number of entries in match_dict: {len(match_dict)}")
193
- print("Sample of match_dict:", list(match_dict.items())[:3])
194
 
195
- # Apply the matches
196
- projections['upload_match'] = projections['player_names'].map(match_dict)
197
- st.session_state['export_dict'] = match_dict # Use match_dict directly
198
 
199
  st.write("Export Dictionary Contents:")
200
  st.write(st.session_state['export_dict'])
 
145
 
146
  # Update projections_df with any new matches
147
  st.session_state['projections_df'] = find_name_mismatches(st.session_state['portfolio'], st.session_state['projections_df'])
148
+ if 'export_dict' not in st.session_state and csv_file is not None:
149
+ try:
150
+ name_id_map = dict(zip(
151
+ st.session_state['csv_file']['Name'],
152
+ st.session_state['csv_file']['Name + ID']
153
+ ))
154
+ print("Using Name + ID mapping")
155
+ except:
156
+ name_id_map = dict(zip(
157
+ st.session_state['csv_file']['Nickname'],
158
+ st.session_state['csv_file']['Id']
159
+ ))
160
+ print("Using Nickname + Id mapping")
161
+
162
+ # Get all names at once
163
+ names = projections['player_names'].tolist()
164
+ choices = list(name_id_map.keys())
165
+
166
+ # Create a dictionary to store matches
167
+ match_dict = {}
168
+
169
+ # Process each name individually but more efficiently
170
+ for name in names:
171
+ # Use extractOne with score_cutoff for efficiency
172
+ match = process.extractOne(
173
+ name,
174
+ choices,
175
+ score_cutoff=85
176
+ )
177
+
178
+ if match:
179
+ match_dict[name] = name_id_map[match[0]]
180
+ else:
181
+ match_dict[name] = name
182
+
183
+ print(f"Number of entries in match_dict: {len(match_dict)}")
184
+ print("Sample of match_dict:", list(match_dict.items())[:3])
 
 
 
 
 
 
 
 
 
185
 
186
+ # Apply the matches
187
+ projections['upload_match'] = projections['player_names'].map(match_dict)
188
+ st.session_state['export_dict'] = match_dict
189
 
190
  st.write("Export Dictionary Contents:")
191
  st.write(st.session_state['export_dict'])