Spaces:
Running
Running
James McCool
commited on
Commit
·
b41a4e7
1
Parent(s):
2fb81af
Add character normalization for data exports in app.py: implement a function to convert accented characters to ASCII equivalents, ensuring cleaner data output. Update CSV export functions to apply normalization, enhancing data consistency and accessibility.
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
|
|
4 |
import streamlit as st
|
5 |
import gspread
|
6 |
import pymongo
|
|
|
7 |
|
8 |
st.set_page_config(layout="wide")
|
9 |
|
@@ -332,18 +333,40 @@ def init_FD_SD_lineups(slate_desig: str, league: str):
|
|
332 |
|
333 |
return DK_seed
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
def convert_df_to_csv(df):
|
336 |
-
|
|
|
|
|
|
|
|
|
337 |
|
338 |
@st.cache_data
|
339 |
def convert_df(array):
|
340 |
array = pd.DataFrame(array, columns=column_names)
|
341 |
-
|
|
|
|
|
|
|
|
|
342 |
|
343 |
@st.cache_data
|
344 |
def convert_pm_df(array):
|
345 |
array = pd.DataFrame(array)
|
346 |
-
|
|
|
|
|
|
|
|
|
347 |
|
348 |
dk_raw, fd_raw, dk_raw_sec, fd_raw_sec, roo_raw, sd_raw, dk_sd_raw, fd_sd_raw, timestamp = load_overall_stats('NBA')
|
349 |
salary_dict = dict(zip(roo_raw.Player, roo_raw.Salary))
|
|
|
4 |
import streamlit as st
|
5 |
import gspread
|
6 |
import pymongo
|
7 |
+
import unicodedata
|
8 |
|
9 |
st.set_page_config(layout="wide")
|
10 |
|
|
|
333 |
|
334 |
return DK_seed
|
335 |
|
336 |
+
def normalize_special_characters(text):
|
337 |
+
"""Convert accented characters to their ASCII equivalents"""
|
338 |
+
if pd.isna(text):
|
339 |
+
return text
|
340 |
+
# Normalize unicode characters to their closest ASCII equivalents
|
341 |
+
normalized = unicodedata.normalize('NFKD', str(text))
|
342 |
+
# Remove diacritics (accents, umlauts, etc.)
|
343 |
+
ascii_text = ''.join(c for c in normalized if not unicodedata.combining(c))
|
344 |
+
return ascii_text
|
345 |
+
|
346 |
def convert_df_to_csv(df):
|
347 |
+
df_clean = df.copy()
|
348 |
+
for col in df_clean.columns:
|
349 |
+
if df_clean[col].dtype == 'object':
|
350 |
+
df_clean[col] = df_clean[col].apply(normalize_special_characters)
|
351 |
+
return df_clean.to_csv(index=False).encode('utf-8')
|
352 |
|
353 |
@st.cache_data
|
354 |
def convert_df(array):
|
355 |
array = pd.DataFrame(array, columns=column_names)
|
356 |
+
# Normalize special characters in the dataframe before export
|
357 |
+
for col in array.columns:
|
358 |
+
if array[col].dtype == 'object':
|
359 |
+
array[col] = array[col].apply(normalize_special_characters)
|
360 |
+
return array.to_csv(index=False).encode('utf-8')
|
361 |
|
362 |
@st.cache_data
|
363 |
def convert_pm_df(array):
|
364 |
array = pd.DataFrame(array)
|
365 |
+
# Normalize special characters in the dataframe before export
|
366 |
+
for col in array.columns:
|
367 |
+
if array[col].dtype == 'object':
|
368 |
+
array[col] = array[col].apply(normalize_special_characters)
|
369 |
+
return array.to_csv(index=False).encode('utf-8')
|
370 |
|
371 |
dk_raw, fd_raw, dk_raw_sec, fd_raw_sec, roo_raw, sd_raw, dk_sd_raw, fd_sd_raw, timestamp = load_overall_stats('NBA')
|
372 |
salary_dict = dict(zip(roo_raw.Player, roo_raw.Salary))
|