James McCool
commited on
Commit
·
8e0da46
1
Parent(s):
cc5b9dd
Add portfolio name extraction and matching in app.py: implement a new function to retrieve unique player names from the portfolio, enhance name matching logic, and update session state with matched names for improved data consistency and analysis.
Browse files- app.py +36 -3
- global_func/get_portfolio_names.py +26 -0
app.py
CHANGED
@@ -16,6 +16,7 @@ from global_func.highlight_rows import highlight_changes, highlight_changes_winn
|
|
16 |
from global_func.load_csv import load_csv
|
17 |
from global_func.find_csv_mismatches import find_csv_mismatches
|
18 |
from global_func.trim_portfolio import trim_portfolio
|
|
|
19 |
|
20 |
freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
|
21 |
player_wrong_names_mlb = ['Enrique Hernandez']
|
@@ -139,6 +140,41 @@ with tab1:
|
|
139 |
if st.session_state['portfolio'] is not None and projections is not None:
|
140 |
st.subheader("Name Matching Analysis")
|
141 |
# Initialize projections_df in session state if it doesn't exist
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
if 'projections_df' not in st.session_state:
|
143 |
st.session_state['projections_df'] = projections.copy()
|
144 |
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
|
@@ -175,9 +211,6 @@ with tab1:
|
|
175 |
match_dict[name] = name_id_map[match[0]]
|
176 |
else:
|
177 |
match_dict[name] = name
|
178 |
-
|
179 |
-
print(f"Number of entries in match_dict: {len(match_dict)}")
|
180 |
-
print("Sample of match_dict:", list(match_dict.items())[:3])
|
181 |
|
182 |
# Apply the matches
|
183 |
projections['upload_match'] = projections['player_names'].map(match_dict)
|
|
|
16 |
from global_func.load_csv import load_csv
|
17 |
from global_func.find_csv_mismatches import find_csv_mismatches
|
18 |
from global_func.trim_portfolio import trim_portfolio
|
19 |
+
from global_func.get_portfolio_names import get_portfolio_names
|
20 |
|
21 |
freq_format = {'Finish_percentile': '{:.2%}', 'Lineup Edge': '{:.2%}', 'Win%': '{:.2%}'}
|
22 |
player_wrong_names_mlb = ['Enrique Hernandez']
|
|
|
140 |
if st.session_state['portfolio'] is not None and projections is not None:
|
141 |
st.subheader("Name Matching Analysis")
|
142 |
# Initialize projections_df in session state if it doesn't exist
|
143 |
+
# Get unique names from portfolio
|
144 |
+
st.session_state['portfolio_names'] = get_portfolio_names(st.session_state['portfolio'])
|
145 |
+
|
146 |
+
# Get names from projections
|
147 |
+
projection_names = projections['player_names'].tolist()
|
148 |
+
|
149 |
+
# Create match dictionary for portfolio names to projection names
|
150 |
+
portfolio_match_dict = {}
|
151 |
+
for portfolio_name in st.session_state['portfolio_names']:
|
152 |
+
match = process.extractOne(
|
153 |
+
portfolio_name,
|
154 |
+
projection_names,
|
155 |
+
score_cutoff=85
|
156 |
+
)
|
157 |
+
if match:
|
158 |
+
portfolio_match_dict[portfolio_name] = match[0]
|
159 |
+
else:
|
160 |
+
portfolio_match_dict[portfolio_name] = portfolio_name
|
161 |
+
|
162 |
+
# Update portfolio with matched names
|
163 |
+
portfolio = st.session_state['portfolio'].copy()
|
164 |
+
player_columns = [col for col in portfolio.columns
|
165 |
+
if col not in ['salary', 'median', 'Own']]
|
166 |
+
|
167 |
+
# For each player column, update names using the match dictionary
|
168 |
+
for col in player_columns:
|
169 |
+
portfolio[col] = portfolio[col].map(lambda x: portfolio_match_dict.get(x, x))
|
170 |
+
|
171 |
+
# Update the portfolio in session state
|
172 |
+
st.session_state['portfolio'] = portfolio
|
173 |
+
st.session_state['origin_portfolio'] = st.session_state['portfolio'].copy()
|
174 |
+
|
175 |
+
# Store the match dictionary for reference
|
176 |
+
st.session_state['portfolio_to_projection_matches'] = portfolio_match_dict
|
177 |
+
|
178 |
if 'projections_df' not in st.session_state:
|
179 |
st.session_state['projections_df'] = projections.copy()
|
180 |
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
|
|
|
211 |
match_dict[name] = name_id_map[match[0]]
|
212 |
else:
|
213 |
match_dict[name] = name
|
|
|
|
|
|
|
214 |
|
215 |
# Apply the matches
|
216 |
projections['upload_match'] = projections['player_names'].map(match_dict)
|
global_func/get_portfolio_names.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from fuzzywuzzy import process
|
6 |
+
|
7 |
+
def get_portfolio_names(portfolio_df):
|
8 |
+
"""
|
9 |
+
Get all unique names from the portfolio dataframe's player columns.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
portfolio_df: DataFrame containing portfolio data
|
13 |
+
|
14 |
+
Returns:
|
15 |
+
list: List of unique player names
|
16 |
+
"""
|
17 |
+
# Get columns that contain player names (excluding non-player columns)
|
18 |
+
player_columns = [col for col in portfolio_df.columns
|
19 |
+
if col not in ['salary', 'median', 'Own']]
|
20 |
+
|
21 |
+
# Get all unique values from these columns
|
22 |
+
unique_names = portfolio_df[player_columns].values.flatten()
|
23 |
+
unique_names = pd.unique(unique_names) # Remove duplicates
|
24 |
+
unique_names = unique_names[~pd.isna(unique_names)] # Remove any NaN values
|
25 |
+
|
26 |
+
return list(unique_names)
|