James McCool
Remove unnamed columns from uploaded DataFrame in load_file.py: enhance data cleanliness by dropping columns with 'Unnamed' in their names before further processing.
cadc1d7
raw
history blame
1.26 kB
import streamlit as st
import numpy as np
import pandas as pd
import time
from fuzzywuzzy import process
import re
## import global functions
from global_func.clean_player_name import clean_player_name
def load_file(upload):
if upload is not None:
try:
clean_name = re.sub(r' \(\d+\)', '', upload.name)
print(clean_name)
print(upload.name)
if clean_name.endswith('.csv'):
df = pd.read_csv(upload)
elif clean_name.endswith(('.xls', '.xlsx')):
df = pd.read_excel(upload)
else:
st.error('Please upload either a CSV or Excel file')
return None, None
for col in df.columns:
if "Unnamed" in col:
df = df.drop(columns=[col])
export_df = df.copy()
for col in df.columns:
if df[col].dtype == 'object':
df[col] = df[col].apply(lambda x: clean_player_name(x) if isinstance(x, str) else x)
return export_df, df
except Exception as e:
st.error(f'Error loading file: {str(e)}')
return None
return None