James McCool commited on
Commit
f5bf222
·
1 Parent(s): fdf735a

Refactor name matching logic in app.py: streamline the process of matching portfolio names to projection names by utilizing session state for improved clarity and efficiency, while enhancing the output messages for better user feedback.

Browse files
Files changed (1) hide show
  1. app.py +24 -58
app.py CHANGED
@@ -154,24 +154,25 @@ with tab1:
154
  st.subheader("Name Matching Analysis")
155
  # Initialize projections_df in session state if it doesn't exist
156
  # Get unique names from portfolio
157
- st.session_state['portfolio_names'] = get_portfolio_names(st.session_state['portfolio'])
158
 
159
  # Get names from projections
160
- projection_names = projections['player_names'].tolist()
 
161
 
162
  # Create match dictionary for portfolio names to projection names
163
  portfolio_match_dict = {}
164
  unmatched_names = []
165
- for portfolio_name in st.session_state['portfolio_names']:
166
  match = process.extractOne(
167
  portfolio_name,
168
- projection_names,
169
  score_cutoff=90
170
  )
171
  if match:
172
  portfolio_match_dict[portfolio_name] = match[0]
173
  if match[1] < 100:
174
- st.write(f"{portfolio_name} matched to {match[0]} with a score of {match[1]}%")
175
  else:
176
  portfolio_match_dict[portfolio_name] = portfolio_name
177
  unmatched_names.append(portfolio_name)
@@ -184,65 +185,30 @@ with tab1:
184
  # For each player column, update names using the match dictionary
185
  for col in player_columns:
186
  portfolio[col] = portfolio[col].map(lambda x: portfolio_match_dict.get(x, x))
187
-
188
- # Update the portfolio in session state
189
  st.session_state['portfolio'] = portfolio
190
 
191
- # Store the match dictionary for reference
192
- st.session_state['portfolio_to_projection_matches'] = portfolio_match_dict
193
-
194
- if 'projections_df' not in st.session_state:
195
- st.session_state['projections_df'] = projections.copy()
196
-
197
- try:
198
- st.session_state['projections_df']['ownership'] = st.session_state['projections_df']['ownership'].str.replace('%', '').astype(float)
199
- except:
200
- pass
201
-
202
- try:
203
- name_id_map = dict(zip(
204
- st.session_state['csv_file']['Name'],
205
- st.session_state['csv_file']['Name + ID']
206
- ))
207
- print("Using Name + ID mapping")
208
- except:
209
- name_id_map = dict(zip(
210
- st.session_state['csv_file']['Nickname'],
211
- st.session_state['csv_file']['Id']
212
- ))
213
- print("Using Nickname + Id mapping")
214
-
215
- # Get all names at once
216
- names = projections['player_names'].tolist()
217
- choices = list(name_id_map.keys())
218
-
219
- # Create a dictionary to store matches
220
- match_dict = {}
221
-
222
- # Process each name individually but more efficiently
223
- for name in names:
224
- # Use extractOne with score_cutoff for efficiency
225
  match = process.extractOne(
226
- name,
227
- choices,
228
- score_cutoff=85
229
  )
230
-
231
  if match:
232
- match_dict[name] = name_id_map[match[0]]
 
 
233
  else:
234
- match_dict[name] = name
 
 
 
 
 
 
235
 
236
- # Apply the matches
237
- projections['upload_match'] = projections['player_names'].map(match_dict)
238
- st.session_state['export_dict'] = match_dict
239
-
240
- if unmatched_names:
241
- st.warning(f"Found {len(unmatched_names)} names in portfolio without matches in projections:")
242
- for name in unmatched_names:
243
- st.write(f"- {name}")
244
- else:
245
- st.success("All portfolio names were matched to projections!")
246
  working_frame = st.session_state['portfolio'].copy()
247
  st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
248
 
@@ -1009,7 +975,7 @@ with tab2:
1009
  submitted = st.form_submit_button("Trim")
1010
  if submitted:
1011
  st.write('initiated')
1012
- st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], map_dict, site_var, type_var, Contest_Size, strength_var, sport_var)
1013
  if 'trimming_dict_maxes' not in st.session_state:
1014
  st.session_state['trimming_dict_maxes'] = {
1015
  'Own': st.session_state['working_frame']['Own'].max(),
 
154
  st.subheader("Name Matching Analysis")
155
  # Initialize projections_df in session state if it doesn't exist
156
  # Get unique names from portfolio
157
+ portfolio_names = get_portfolio_names(st.session_state['portfolio'])
158
 
159
  # Get names from projections
160
+ csv_names = st.session_state['csv_file']['Name'].tolist()
161
+ projection_names = projections_file['player_names'].tolist()
162
 
163
  # Create match dictionary for portfolio names to projection names
164
  portfolio_match_dict = {}
165
  unmatched_names = []
166
+ for portfolio_name in portfolio_names:
167
  match = process.extractOne(
168
  portfolio_name,
169
+ csv_names,
170
  score_cutoff=90
171
  )
172
  if match:
173
  portfolio_match_dict[portfolio_name] = match[0]
174
  if match[1] < 100:
175
+ st.write(f"{portfolio_name} matched to site csv {match[0]} with a score of {match[1]}%")
176
  else:
177
  portfolio_match_dict[portfolio_name] = portfolio_name
178
  unmatched_names.append(portfolio_name)
 
185
  # For each player column, update names using the match dictionary
186
  for col in player_columns:
187
  portfolio[col] = portfolio[col].map(lambda x: portfolio_match_dict.get(x, x))
 
 
188
  st.session_state['portfolio'] = portfolio
189
 
190
+ # Create match dictionary for portfolio names to projection names
191
+ projections_match_dict = {}
192
+ unmatched_proj_names = []
193
+ for projections_name in projection_names:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  match = process.extractOne(
195
+ projections_name,
196
+ csv_names,
197
+ score_cutoff=90
198
  )
 
199
  if match:
200
+ projections_match_dict[projections_name] = match[0]
201
+ if match[1] < 100:
202
+ st.write(f"{projections_name} matched to site csv {match[0]} with a score of {match[1]}%")
203
  else:
204
+ projections_match_dict[projections_name] = projections_name
205
+ unmatched_proj_names.append(projections_name)
206
+
207
+ # Update projections with matched names
208
+ projections = st.session_state['projections_df'].copy()
209
+ projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
210
+ st.session_state['projections_df'] = projections
211
 
 
 
 
 
 
 
 
 
 
 
212
  working_frame = st.session_state['portfolio'].copy()
213
  st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
214
 
 
975
  submitted = st.form_submit_button("Trim")
976
  if submitted:
977
  st.write('initiated')
978
+ st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
979
  if 'trimming_dict_maxes' not in st.session_state:
980
  st.session_state['trimming_dict_maxes'] = {
981
  'Own': st.session_state['working_frame']['Own'].max(),