James McCool commited on
Commit
8e0da46
·
1 Parent(s): cc5b9dd

Add portfolio name extraction and matching in app.py: implement a new function to retrieve unique player names from the portfolio, enhance name matching logic, and update session state with matched names for improved data consistency and analysis.

Browse files
Files changed (2) hide show
  1. app.py +36 -3
  2. global_func/get_portfolio_names.py +26 -0
app.py CHANGED
@@ -16,6 +16,7 @@ from global_func.highlight_rows import highlight_changes, highlight_changes_winn
16
  from global_func.load_csv import load_csv
17
  from global_func.find_csv_mismatches import find_csv_mismatches
18
  from global_func.trim_portfolio import trim_portfolio
 
19
 
20
  freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
21
  player_wrong_names_mlb = ['Enrique Hernandez']
@@ -139,6 +140,41 @@ with tab1:
139
  if st.session_state['portfolio'] is not None and projections is not None:
140
  st.subheader("Name Matching Analysis")
141
  # Initialize projections_df in session state if it doesn't exist
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  if 'projections_df' not in st.session_state:
143
  st.session_state['projections_df'] = projections.copy()
144
  st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
@@ -175,9 +211,6 @@ with tab1:
175
  match_dict[name] = name_id_map[match[0]]
176
  else:
177
  match_dict[name] = name
178
-
179
- print(f"Number of entries in match_dict: {len(match_dict)}")
180
- print("Sample of match_dict:", list(match_dict.items())[:3])
181
 
182
  # Apply the matches
183
  projections['upload_match'] = projections['player_names'].map(match_dict)
 
16
  from global_func.load_csv import load_csv
17
  from global_func.find_csv_mismatches import find_csv_mismatches
18
  from global_func.trim_portfolio import trim_portfolio
19
+ from global_func.get_portfolio_names import get_portfolio_names
20
 
21
  freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
22
  player_wrong_names_mlb = ['Enrique Hernandez']
 
140
  if st.session_state['portfolio'] is not None and projections is not None:
141
  st.subheader("Name Matching Analysis")
142
  # Initialize projections_df in session state if it doesn't exist
143
+ # Get unique names from portfolio
144
+ st.session_state['portfolio_names'] = get_portfolio_names(st.session_state['portfolio'])
145
+
146
+ # Get names from projections
147
+ projection_names = projections['player_names'].tolist()
148
+
149
+ # Create match dictionary for portfolio names to projection names
150
+ portfolio_match_dict = {}
151
+ for portfolio_name in st.session_state['portfolio_names']:
152
+ match = process.extractOne(
153
+ portfolio_name,
154
+ projection_names,
155
+ score_cutoff=85
156
+ )
157
+ if match:
158
+ portfolio_match_dict[portfolio_name] = match[0]
159
+ else:
160
+ portfolio_match_dict[portfolio_name] = portfolio_name
161
+
162
+ # Update portfolio with matched names
163
+ portfolio = st.session_state['portfolio'].copy()
164
+ player_columns = [col for col in portfolio.columns
165
+ if col not in ['salary', 'median', 'Own']]
166
+
167
+ # For each player column, update names using the match dictionary
168
+ for col in player_columns:
169
+ portfolio[col] = portfolio[col].map(lambda x: portfolio_match_dict.get(x, x))
170
+
171
+ # Update the portfolio in session state
172
+ st.session_state['portfolio'] = portfolio
173
+ st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
174
+
175
+ # Store the match dictionary for reference
176
+ st.session_state['portfolio_to_projection_matches'] = portfolio_match_dict
177
+
178
  if 'projections_df' not in st.session_state:
179
  st.session_state['projections_df'] = projections.copy()
180
  st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
 
211
  match_dict[name] = name_id_map[match[0]]
212
  else:
213
  match_dict[name] = name
 
 
 
214
 
215
  # Apply the matches
216
  projections['upload_match'] = projections['player_names'].map(match_dict)
global_func/get_portfolio_names.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from fuzzywuzzy import process
6
+
7
+ def get_portfolio_names(portfolio_df):
8
+ """
9
+ Get all unique names from the portfolio dataframe's player columns.
10
+
11
+ Args:
12
+ portfolio_df: DataFrame containing portfolio data
13
+
14
+ Returns:
15
+ list: List of unique player names
16
+ """
17
+ # Get columns that contain player names (excluding non-player columns)
18
+ player_columns = [col for col in portfolio_df.columns
19
+ if col not in ['salary', 'median', 'Own']]
20
+
21
+ # Get all unique values from these columns
22
+ unique_names = portfolio_df[player_columns].values.flatten()
23
+ unique_names = pd.unique(unique_names) # Remove duplicates
24
+ unique_names = unique_names[~pd.isna(unique_names)] # Remove any NaN values
25
+
26
+ return list(unique_names)