James McCool
commited on
Commit
·
f5bf222
1
Parent(s):
fdf735a
Refactor name matching logic in app.py: streamline the process of matching portfolio names to projection names by utilizing session state for improved clarity and efficiency, while enhancing the output messages for better user feedback.
Browse files
app.py
CHANGED
@@ -154,24 +154,25 @@ with tab1:
|
|
154 |
st.subheader("Name Matching Analysis")
|
155 |
# Initialize projections_df in session state if it doesn't exist
|
156 |
# Get unique names from portfolio
|
157 |
-
|
158 |
|
159 |
# Get names from projections
|
160 |
-
|
|
|
161 |
|
162 |
# Create match dictionary for portfolio names to projection names
|
163 |
portfolio_match_dict = {}
|
164 |
unmatched_names = []
|
165 |
-
for portfolio_name in
|
166 |
match = process.extractOne(
|
167 |
portfolio_name,
|
168 |
-
|
169 |
score_cutoff=90
|
170 |
)
|
171 |
if match:
|
172 |
portfolio_match_dict[portfolio_name] = match[0]
|
173 |
if match[1] < 100:
|
174 |
-
st.write(f"{portfolio_name} matched to {match[0]} with a score of {match[1]}%")
|
175 |
else:
|
176 |
portfolio_match_dict[portfolio_name] = portfolio_name
|
177 |
unmatched_names.append(portfolio_name)
|
@@ -184,65 +185,30 @@ with tab1:
|
|
184 |
# For each player column, update names using the match dictionary
|
185 |
for col in player_columns:
|
186 |
portfolio[col] = portfolio[col].map(lambda x: portfolio_match_dict.get(x, x))
|
187 |
-
|
188 |
-
# Update the portfolio in session state
|
189 |
st.session_state['portfolio'] = portfolio
|
190 |
|
191 |
-
#
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
st.session_state['projections_df'] = projections.copy()
|
196 |
-
|
197 |
-
try:
|
198 |
-
st.session_state['projections_df']['ownership'] = st.session_state['projections_df']['ownership'].str.replace('%', '').astype(float)
|
199 |
-
except:
|
200 |
-
pass
|
201 |
-
|
202 |
-
try:
|
203 |
-
name_id_map = dict(zip(
|
204 |
-
st.session_state['csv_file']['Name'],
|
205 |
-
st.session_state['csv_file']['Name + ID']
|
206 |
-
))
|
207 |
-
print("Using Name + ID mapping")
|
208 |
-
except:
|
209 |
-
name_id_map = dict(zip(
|
210 |
-
st.session_state['csv_file']['Nickname'],
|
211 |
-
st.session_state['csv_file']['Id']
|
212 |
-
))
|
213 |
-
print("Using Nickname + Id mapping")
|
214 |
-
|
215 |
-
# Get all names at once
|
216 |
-
names = projections['player_names'].tolist()
|
217 |
-
choices = list(name_id_map.keys())
|
218 |
-
|
219 |
-
# Create a dictionary to store matches
|
220 |
-
match_dict = {}
|
221 |
-
|
222 |
-
# Process each name individually but more efficiently
|
223 |
-
for name in names:
|
224 |
-
# Use extractOne with score_cutoff for efficiency
|
225 |
match = process.extractOne(
|
226 |
-
|
227 |
-
|
228 |
-
score_cutoff=
|
229 |
)
|
230 |
-
|
231 |
if match:
|
232 |
-
|
|
|
|
|
233 |
else:
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
-
# Apply the matches
|
237 |
-
projections['upload_match'] = projections['player_names'].map(match_dict)
|
238 |
-
st.session_state['export_dict'] = match_dict
|
239 |
-
|
240 |
-
if unmatched_names:
|
241 |
-
st.warning(f"Found {len(unmatched_names)} names in portfolio without matches in projections:")
|
242 |
-
for name in unmatched_names:
|
243 |
-
st.write(f"- {name}")
|
244 |
-
else:
|
245 |
-
st.success("All portfolio names were matched to projections!")
|
246 |
working_frame = st.session_state['portfolio'].copy()
|
247 |
st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
|
248 |
|
@@ -1009,7 +975,7 @@ with tab2:
|
|
1009 |
submitted = st.form_submit_button("Trim")
|
1010 |
if submitted:
|
1011 |
st.write('initiated')
|
1012 |
-
st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], map_dict, site_var, type_var, Contest_Size, strength_var, sport_var)
|
1013 |
if 'trimming_dict_maxes' not in st.session_state:
|
1014 |
st.session_state['trimming_dict_maxes'] = {
|
1015 |
'Own': st.session_state['working_frame']['Own'].max(),
|
|
|
154 |
st.subheader("Name Matching Analysis")
|
155 |
# Initialize projections_df in session state if it doesn't exist
|
156 |
# Get unique names from portfolio
|
157 |
+
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
158 |
|
159 |
# Get names from projections
|
160 |
+
csv_names = st.session_state['csv_file']['Name'].tolist()
|
161 |
+
projection_names = projections_file['player_names'].tolist()
|
162 |
|
163 |
# Create match dictionary for portfolio names to projection names
|
164 |
portfolio_match_dict = {}
|
165 |
unmatched_names = []
|
166 |
+
for portfolio_name in portfolio_names:
|
167 |
match = process.extractOne(
|
168 |
portfolio_name,
|
169 |
+
csv_names,
|
170 |
score_cutoff=90
|
171 |
)
|
172 |
if match:
|
173 |
portfolio_match_dict[portfolio_name] = match[0]
|
174 |
if match[1] < 100:
|
175 |
+
st.write(f"{portfolio_name} matched to site csv {match[0]} with a score of {match[1]}%")
|
176 |
else:
|
177 |
portfolio_match_dict[portfolio_name] = portfolio_name
|
178 |
unmatched_names.append(portfolio_name)
|
|
|
185 |
# For each player column, update names using the match dictionary
|
186 |
for col in player_columns:
|
187 |
portfolio[col] = portfolio[col].map(lambda x: portfolio_match_dict.get(x, x))
|
|
|
|
|
188 |
st.session_state['portfolio'] = portfolio
|
189 |
|
190 |
+
# Create match dictionary for portfolio names to projection names
|
191 |
+
projections_match_dict = {}
|
192 |
+
unmatched_proj_names = []
|
193 |
+
for projections_name in projection_names:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
match = process.extractOne(
|
195 |
+
projections_name,
|
196 |
+
csv_names,
|
197 |
+
score_cutoff=90
|
198 |
)
|
|
|
199 |
if match:
|
200 |
+
projections_match_dict[projections_name] = match[0]
|
201 |
+
if match[1] < 100:
|
202 |
+
st.write(f"{projections_name} matched to site csv {match[0]} with a score of {match[1]}%")
|
203 |
else:
|
204 |
+
projections_match_dict[projections_name] = projections_name
|
205 |
+
unmatched_proj_names.append(projections_name)
|
206 |
+
|
207 |
+
# Update projections with matched names
|
208 |
+
projections = st.session_state['projections_df'].copy()
|
209 |
+
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
210 |
+
st.session_state['projections_df'] = projections
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
working_frame = st.session_state['portfolio'].copy()
|
213 |
st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
|
214 |
|
|
|
975 |
submitted = st.form_submit_button("Trim")
|
976 |
if submitted:
|
977 |
st.write('initiated')
|
978 |
+
st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
979 |
if 'trimming_dict_maxes' not in st.session_state:
|
980 |
st.session_state['trimming_dict_maxes'] = {
|
981 |
'Own': st.session_state['working_frame']['Own'].max(),
|