James McCool
commited on
Commit
·
082eda6
1
Parent(s):
b4fbe69
Enhance name matching functionality in app.py and find_name_mismatches.py
Browse files- Updated the find_name_mismatches function to accept ownership and actual dictionaries, allowing for more comprehensive name matching.
- Modified app.py to handle the additional outputs from the updated function, ensuring proper session state management for ownership and actual data.
- Improved data integrity by copying ownership and actual dictionaries before modifications, maintaining original data consistency.
- app.py +1 -1
- global_func/find_name_mismatches.py +21 -2
app.py
CHANGED
@@ -79,7 +79,7 @@ with tab1:
|
|
79 |
st.session_state['projections_df'] = projections.copy()
|
80 |
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
|
81 |
# Run name matching only once when first loading the files
|
82 |
-
st.session_state['Contest'], st.session_state['projections_df'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df'])
|
83 |
|
84 |
with tab2:
|
85 |
if st.button('Clear data', key='reset3'):
|
|
|
79 |
st.session_state['projections_df'] = projections.copy()
|
80 |
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
|
81 |
# Run name matching only once when first loading the files
|
82 |
+
st.session_state['Contest'], st.session_state['projections_df'], st.session_state['ownership_dict'], st.session_state['actual_dict'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df'], st.session_state['ownership_dict'], st.session_state['actual_dict'])
|
83 |
|
84 |
with tab2:
|
85 |
if st.button('Clear data', key='reset3'):
|
global_func/find_name_mismatches.py
CHANGED
@@ -4,10 +4,12 @@ import pandas as pd
|
|
4 |
import time
|
5 |
from fuzzywuzzy import process
|
6 |
|
7 |
-
def find_name_mismatches(contest_df, projections_df):
|
8 |
# Create a copy of the projections dataframe to avoid modifying the original
|
9 |
projections_df = projections_df.copy()
|
10 |
contest_df = contest_df.copy()
|
|
|
|
|
11 |
|
12 |
name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
|
13 |
|
@@ -79,12 +81,23 @@ def find_name_mismatches(contest_df, projections_df):
|
|
79 |
for col in name_columns:
|
80 |
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
81 |
|
|
|
|
|
|
|
|
|
|
|
82 |
# Process manual selections
|
83 |
for projection_name, selection in selections.items():
|
84 |
if selection != "None of these":
|
85 |
selected_name = selection.split(" (")[0]
|
86 |
for col in name_columns:
|
87 |
contest_df[col] = contest_df[col].replace(selected_name, projection_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
st.success(f"Replaced '{selected_name}' with '{projection_name}'")
|
89 |
|
90 |
st.success("All changes applied successfully!")
|
@@ -98,4 +111,10 @@ def find_name_mismatches(contest_df, projections_df):
|
|
98 |
for projection_name, contest_name in auto_matches.items():
|
99 |
for col in name_columns:
|
100 |
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import time
|
5 |
from fuzzywuzzy import process
|
6 |
|
7 |
+
def find_name_mismatches(contest_df, projections_df, ownership_dict, fpts_dict):
|
8 |
# Create a copy of the projections dataframe to avoid modifying the original
|
9 |
projections_df = projections_df.copy()
|
10 |
contest_df = contest_df.copy()
|
11 |
+
ownership_dict = ownership_dict.copy()
|
12 |
+
fpts_dict = fpts_dict.copy()
|
13 |
|
14 |
name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
|
15 |
|
|
|
81 |
for col in name_columns:
|
82 |
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
83 |
|
84 |
+
if contest_name in ownership_dict:
|
85 |
+
ownership_dict[projection_name] = ownership_dict.pop(contest_name)
|
86 |
+
if contest_name in fpts_dict:
|
87 |
+
fpts_dict[projection_name] = fpts_dict.pop(contest_name)
|
88 |
+
|
89 |
# Process manual selections
|
90 |
for projection_name, selection in selections.items():
|
91 |
if selection != "None of these":
|
92 |
selected_name = selection.split(" (")[0]
|
93 |
for col in name_columns:
|
94 |
contest_df[col] = contest_df[col].replace(selected_name, projection_name)
|
95 |
+
|
96 |
+
if selected_name in ownership_dict:
|
97 |
+
ownership_dict[projection_name] = ownership_dict.pop(selected_name)
|
98 |
+
if selected_name in fpts_dict:
|
99 |
+
fpts_dict[projection_name] = fpts_dict.pop(selected_name)
|
100 |
+
|
101 |
st.success(f"Replaced '{selected_name}' with '{projection_name}'")
|
102 |
|
103 |
st.success("All changes applied successfully!")
|
|
|
111 |
for projection_name, contest_name in auto_matches.items():
|
112 |
for col in name_columns:
|
113 |
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
114 |
+
|
115 |
+
if contest_name in ownership_dict:
|
116 |
+
ownership_dict[projection_name] = ownership_dict.pop(contest_name)
|
117 |
+
if contest_name in fpts_dict:
|
118 |
+
fpts_dict[projection_name] = fpts_dict.pop(contest_name)
|
119 |
+
|
120 |
+
return contest_df, projections_df, ownership_dict, fpts_dict
|