James McCool
commited on
Commit
·
356c7d4
1
Parent(s):
b9bf803
Refactor contest data handling in `app.py` and `find_name_mismatches.py` for improved functionality
Browse files- Removed the position dictionary from the data returned by `load_file`, simplifying the data structure.
- Updated the `find_name_mismatches` function to use `contest_df` instead of `portfolio_df`, enhancing clarity in variable naming.
- Adjusted the logic in `app.py` to ensure proper handling of projections and contest data, including the addition of name matching analysis.
- app.py +10 -33
- global_func/find_name_mismatches.py +4 -5
- global_func/load_file.py +1 -2
app.py
CHANGED
@@ -31,7 +31,7 @@ with tab1:
|
|
31 |
del st.session_state['Contest']
|
32 |
|
33 |
if Contest_file:
|
34 |
-
st.session_state['Contest'], st.session_state['
|
35 |
st.session_state['Contest'] = st.session_state['Contest'].dropna(how='all')
|
36 |
st.session_state['Contest'] = st.session_state['Contest'].reset_index(drop=True)
|
37 |
if st.session_state['Contest'] is not None:
|
@@ -67,39 +67,16 @@ with tab1:
|
|
67 |
st.success('Projections file loaded successfully!')
|
68 |
st.dataframe(projections.head(10))
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
# if csv_file is not None and 'export_dict' not in st.session_state:
|
81 |
-
# # Create a dictionary of Name to Name+ID from csv_file
|
82 |
-
# try:
|
83 |
-
# name_id_map = dict(zip(
|
84 |
-
# st.session_state['csv_file']['Name'],
|
85 |
-
# st.session_state['csv_file']['Name + ID']
|
86 |
-
# ))
|
87 |
-
# except:
|
88 |
-
# name_id_map = dict(zip(
|
89 |
-
# st.session_state['csv_file']['Nickname'],
|
90 |
-
# st.session_state['csv_file']['Id']
|
91 |
-
# ))
|
92 |
-
|
93 |
-
# # Function to find best match
|
94 |
-
# def find_best_match(name):
|
95 |
-
# best_match = process.extractOne(name, name_id_map.keys())
|
96 |
-
# if best_match and best_match[1] >= 85: # 85% match threshold
|
97 |
-
# return name_id_map[best_match[0]]
|
98 |
-
# return name # Return original name if no good match found
|
99 |
-
|
100 |
-
# # Apply the matching
|
101 |
-
# projections['upload_match'] = projections['player_names'].apply(find_best_match)
|
102 |
-
# st.session_state['export_dict'] = dict(zip(projections['player_names'], projections['upload_match']))
|
103 |
|
104 |
with tab2:
|
105 |
if st.button('Clear data', key='reset3'):
|
|
|
31 |
del st.session_state['Contest']
|
32 |
|
33 |
if Contest_file:
|
34 |
+
st.session_state['Contest'], st.session_state['ownership_dict'], st.session_state['entry_list'] = load_file(Contest_file)
|
35 |
st.session_state['Contest'] = st.session_state['Contest'].dropna(how='all')
|
36 |
st.session_state['Contest'] = st.session_state['Contest'].reset_index(drop=True)
|
37 |
if st.session_state['Contest'] is not None:
|
|
|
67 |
st.success('Projections file loaded successfully!')
|
68 |
st.dataframe(projections.head(10))
|
69 |
|
70 |
+
if Contest_file and projections_file:
|
71 |
+
if st.session_state['Contest'] is not None and projections is not None:
|
72 |
+
st.subheader("Name Matching Analysis")
|
73 |
+
# Initialize projections_df in session state if it doesn't exist
|
74 |
+
if 'projections_df' not in st.session_state:
|
75 |
+
st.session_state['projections_df'] = projections.copy()
|
76 |
+
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
|
77 |
|
78 |
+
# Update projections_df with any new matches
|
79 |
+
st.session_state['projections_df'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
with tab2:
|
82 |
if st.button('Clear data', key='reset3'):
|
global_func/find_name_mismatches.py
CHANGED
@@ -4,12 +4,11 @@ import pandas as pd
|
|
4 |
import time
|
5 |
from fuzzywuzzy import process
|
6 |
|
7 |
-
def find_name_mismatches(
|
8 |
# Create a copy of the projections dataframe to avoid modifying the original
|
9 |
projections_df = projections_df.copy()
|
10 |
|
11 |
-
|
12 |
-
portfolio_df.columns = range(col_count)
|
13 |
|
14 |
if 'player_names' not in projections_df.columns:
|
15 |
st.error("No 'player_names' column found in projections file")
|
@@ -17,8 +16,8 @@ def find_name_mismatches(portfolio_df, projections_df):
|
|
17 |
|
18 |
# Get unique player names from portfolio and projections
|
19 |
portfolio_players = set()
|
20 |
-
for col in
|
21 |
-
portfolio_players.update(
|
22 |
projection_players = set(projections_df['player_names'].unique())
|
23 |
projection_players_list = list(projection_players)
|
24 |
|
|
|
4 |
import time
|
5 |
from fuzzywuzzy import process
|
6 |
|
7 |
+
def find_name_mismatches(contest_df, projections_df):
|
8 |
# Create a copy of the projections dataframe to avoid modifying the original
|
9 |
projections_df = projections_df.copy()
|
10 |
|
11 |
+
name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
|
|
|
12 |
|
13 |
if 'player_names' not in projections_df.columns:
|
14 |
st.error("No 'player_names' column found in projections file")
|
|
|
16 |
|
17 |
# Get unique player names from portfolio and projections
|
18 |
portfolio_players = set()
|
19 |
+
for col in name_columns:
|
20 |
+
portfolio_players.update(contest_df[col].unique())
|
21 |
projection_players = set(projections_df['player_names'].unique())
|
22 |
projection_players_list = list(projection_players)
|
23 |
|
global_func/load_file.py
CHANGED
@@ -45,13 +45,12 @@ def load_file(upload):
|
|
45 |
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
46 |
# Remove position indicators from the end of each entry
|
47 |
df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
|
48 |
-
position_dict = dict(zip(df['Player'], df['Pos']))
|
49 |
ownership_dict = dict(zip(df['Player'], df['Own']))
|
50 |
cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS'])
|
51 |
entry_list = list(set(df['BaseName']))
|
52 |
entry_list.sort()
|
53 |
|
54 |
-
return cleaned_df,
|
55 |
except Exception as e:
|
56 |
st.error(f'Error loading file: {str(e)}')
|
57 |
return None
|
|
|
45 |
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
46 |
# Remove position indicators from the end of each entry
|
47 |
df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
|
|
|
48 |
ownership_dict = dict(zip(df['Player'], df['Own']))
|
49 |
cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS'])
|
50 |
entry_list = list(set(df['BaseName']))
|
51 |
entry_list.sort()
|
52 |
|
53 |
+
return cleaned_df, ownership_dict, entry_list
|
54 |
except Exception as e:
|
55 |
st.error(f'Error loading file: {str(e)}')
|
56 |
return None
|