James McCool commited on
Commit
b41a4e7
·
1 Parent(s): 2fb81af

Add character normalization for data exports in app.py: implement a function to convert accented characters to ASCII equivalents, ensuring cleaner data output. Update CSV export functions to apply normalization, enhancing data consistency and accessibility.

Browse files
Files changed (1) hide show
  1. app.py +26 -3
app.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  import streamlit as st
5
  import gspread
6
  import pymongo
 
7
 
8
  st.set_page_config(layout="wide")
9
 
@@ -332,18 +333,40 @@ def init_FD_SD_lineups(slate_desig: str, league: str):
332
 
333
  return DK_seed
334
 
 
 
 
 
 
 
 
 
 
 
335
  def convert_df_to_csv(df):
336
- return df.to_csv().encode('utf-8')
 
 
 
 
337
 
338
  @st.cache_data
339
  def convert_df(array):
340
  array = pd.DataFrame(array, columns=column_names)
341
- return array.to_csv().encode('utf-8')
 
 
 
 
342
 
343
  @st.cache_data
344
  def convert_pm_df(array):
345
  array = pd.DataFrame(array)
346
- return array.to_csv().encode('utf-8')
 
 
 
 
347
 
348
  dk_raw, fd_raw, dk_raw_sec, fd_raw_sec, roo_raw, sd_raw, dk_sd_raw, fd_sd_raw, timestamp = load_overall_stats('NBA')
349
  salary_dict = dict(zip(roo_raw.Player, roo_raw.Salary))
 
4
  import streamlit as st
5
  import gspread
6
  import pymongo
7
+ import unicodedata
8
 
9
  st.set_page_config(layout="wide")
10
 
 
333
 
334
  return DK_seed
335
 
336
+ def normalize_special_characters(text):
337
+ """Convert accented characters to their ASCII equivalents"""
338
+ if pd.isna(text):
339
+ return text
340
+ # Normalize unicode characters to their closest ASCII equivalents
341
+ normalized = unicodedata.normalize('NFKD', str(text))
342
+ # Remove diacritics (accents, umlauts, etc.)
343
+ ascii_text = ''.join(c for c in normalized if not unicodedata.combining(c))
344
+ return ascii_text
345
+
346
  def convert_df_to_csv(df):
347
+ df_clean = df.copy()
348
+ for col in df_clean.columns:
349
+ if df_clean[col].dtype == 'object':
350
+ df_clean[col] = df_clean[col].apply(normalize_special_characters)
351
+ return df_clean.to_csv(index=False).encode('utf-8')
352
 
353
  @st.cache_data
354
  def convert_df(array):
355
  array = pd.DataFrame(array, columns=column_names)
356
+ # Normalize special characters in the dataframe before export
357
+ for col in array.columns:
358
+ if array[col].dtype == 'object':
359
+ array[col] = array[col].apply(normalize_special_characters)
360
+ return array.to_csv(index=False).encode('utf-8')
361
 
362
  @st.cache_data
363
  def convert_pm_df(array):
364
  array = pd.DataFrame(array)
365
+ # Normalize special characters in the dataframe before export
366
+ for col in array.columns:
367
+ if array[col].dtype == 'object':
368
+ array[col] = array[col].apply(normalize_special_characters)
369
+ return array.to_csv(index=False).encode('utf-8')
370
 
371
  dk_raw, fd_raw, dk_raw_sec, fd_raw_sec, roo_raw, sd_raw, dk_sd_raw, fd_sd_raw, timestamp = load_overall_stats('NBA')
372
  salary_dict = dict(zip(roo_raw.Player, roo_raw.Salary))