James McCool commited on
Commit
92f395c
·
1 Parent(s): 5dc55b5

Implement character normalization and enhance data export functionality in Streamlit app. Added a new function to convert accented characters to ASCII, ensuring cleaner data exports. Updated export logic to include options for both IDs and names, with filtering capabilities based on user-defined salary constraints.

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +155 -17
src/streamlit_app.py CHANGED
@@ -3,6 +3,7 @@ import numpy as np
3
  import pandas as pd
4
  import pymongo
5
  import os
 
6
 
7
  st.set_page_config(layout="wide")
8
 
@@ -139,15 +140,46 @@ def init_FD_lineups(type):
139
 
140
  return FD_seed
141
 
 
 
 
 
 
 
 
 
 
 
142
  def convert_df_to_csv(df):
143
- return df.to_csv().encode('utf-8')
 
 
 
 
144
 
145
  @st.cache_data
146
  def convert_df(array):
147
  array = pd.DataFrame(array, columns=column_names)
148
- return array.to_csv().encode('utf-8')
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  roo_data, sd_roo_data, timestamp = init_baselines()
 
 
 
 
151
  hold_display = roo_data
152
  lineup_display = []
153
  check_list = []
@@ -164,6 +196,10 @@ with tab1:
164
  # i.e. clear values from both square and cube
165
  st.cache_data.clear()
166
  roo_data, sd_roo_data, timestamp = init_baselines()
 
 
 
 
167
  dk_lineups = init_DK_lineups('Regular')
168
  fd_lineups = init_FD_lineups('Regular')
169
  hold_display = roo_data
@@ -213,6 +249,10 @@ with tab2:
213
  if st.button("Load/Reset Data", key='reset2'):
214
  st.cache_data.clear()
215
  roo_data, sd_roo_data, timestamp = init_baselines()
 
 
 
 
216
  hold_display = roo_data
217
  dk_lineups = init_DK_lineups('Regular')
218
  fd_lineups = init_FD_lineups('Regular')
@@ -223,6 +263,12 @@ with tab2:
223
  col1, col2, col3, col4, col5 = st.columns(5)
224
  with col1:
225
  site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'))
 
 
 
 
 
 
226
 
227
  with col2:
228
  slate_var1 = st.radio("Which data are you loading?", ('Regular', 'Showdown'))
@@ -281,20 +327,112 @@ with tab2:
281
  salary_min_var = st.number_input("Minimum salary used", min_value = 0, max_value = 60000, value = 59000, step = 100, key = 'salary_min_var')
282
  salary_max_var = st.number_input("Maximum salary used", min_value = 0, max_value = 60000, value = 60000, step = 100, key = 'salary_max_var')
283
 
284
- if st.button("Prepare data export", key='data_export'):
285
- data_export = st.session_state.working_seed.copy()
286
- # if site_var1 == 'Draftkings':
287
- # for col_idx in range(6):
288
- # data_export[:, col_idx] = np.array([id_dict.get(player, player) for player in data_export[:, col_idx]])
289
- # elif site_var1 == 'Fanduel':
290
- # for col_idx in range(6):
291
- # data_export[:, col_idx] = np.array([id_dict.get(player, player) for player in data_export[:, col_idx]])
292
- st.download_button(
293
- label="Export optimals set",
294
- data=convert_df(data_export),
295
- file_name='NBA_optimals_export.csv',
296
- mime='text/csv',
297
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
  if site_var1 == 'Draftkings':
300
  if 'working_seed' in st.session_state:
@@ -353,7 +491,7 @@ with tab2:
353
  st.download_button(
354
  label="Export display optimals",
355
  data=convert_df(export_file),
356
- file_name='NBA_display_optimals.csv',
357
  mime='text/csv',
358
  )
359
 
 
3
  import pandas as pd
4
  import pymongo
5
  import os
6
+ import unicodedata
7
 
8
  st.set_page_config(layout="wide")
9
 
 
140
 
141
  return FD_seed
142
 
143
+ def normalize_special_characters(text):
144
+ """Convert accented characters to their ASCII equivalents"""
145
+ if pd.isna(text):
146
+ return text
147
+ # Normalize unicode characters to their closest ASCII equivalents
148
+ normalized = unicodedata.normalize('NFKD', str(text))
149
+ # Remove diacritics (accents, umlauts, etc.)
150
+ ascii_text = ''.join(c for c in normalized if not unicodedata.combining(c))
151
+ return ascii_text
152
+
153
  def convert_df_to_csv(df):
154
+ df_clean = df.copy()
155
+ for col in df_clean.columns:
156
+ if df_clean[col].dtype == 'object':
157
+ df_clean[col] = df_clean[col].apply(normalize_special_characters)
158
+ return df_clean.to_csv(index=False).encode('utf-8')
159
 
160
  @st.cache_data
161
  def convert_df(array):
162
  array = pd.DataFrame(array, columns=column_names)
163
+ # Normalize special characters in the dataframe before export
164
+ for col in array.columns:
165
+ if array[col].dtype == 'object':
166
+ array[col] = array[col].apply(normalize_special_characters)
167
+ return array.to_csv(index=False).encode('utf-8')
168
+
169
+ @st.cache_data
170
+ def convert_pm_df(array):
171
+ array = pd.DataFrame(array)
172
+ # Normalize special characters in the dataframe before export
173
+ for col in array.columns:
174
+ if array[col].dtype == 'object':
175
+ array[col] = array[col].apply(normalize_special_characters)
176
+ return array.to_csv(index=False).encode('utf-8')
177
 
178
  roo_data, sd_roo_data, timestamp = init_baselines()
179
+ dk_id_dict = dict(zip(roo_data['Player'], roo_data['player_id']))
180
+ dk_id_dict_sd = dict(zip(sd_roo_data['Player'], sd_roo_data['player_id']))
181
+ fd_id_dict = dict(zip(roo_data['Player'], roo_data['player_id']))
182
+ fd_id_dict_sd = dict(zip(sd_roo_data['Player'], sd_roo_data['player_id']))
183
  hold_display = roo_data
184
  lineup_display = []
185
  check_list = []
 
196
  # i.e. clear values from both square and cube
197
  st.cache_data.clear()
198
  roo_data, sd_roo_data, timestamp = init_baselines()
199
+ dk_id_dict = dict(zip(roo_data['Player'], roo_data['player_id']))
200
+ dk_id_dict_sd = dict(zip(sd_roo_data['Player'], sd_roo_data['player_id']))
201
+ fd_id_dict = dict(zip(roo_data['Player'], roo_data['player_id']))
202
+ fd_id_dict_sd = dict(zip(sd_roo_data['Player'], sd_roo_data['player_id']))
203
  dk_lineups = init_DK_lineups('Regular')
204
  fd_lineups = init_FD_lineups('Regular')
205
  hold_display = roo_data
 
249
  if st.button("Load/Reset Data", key='reset2'):
250
  st.cache_data.clear()
251
  roo_data, sd_roo_data, timestamp = init_baselines()
252
+ dk_id_dict = dict(zip(roo_data['Player'], roo_data['player_id']))
253
+ dk_id_dict_sd = dict(zip(sd_roo_data['Player'], sd_roo_data['player_id']))
254
+ fd_id_dict = dict(zip(roo_data['Player'], roo_data['player_id']))
255
+ fd_id_dict_sd = dict(zip(sd_roo_data['Player'], sd_roo_data['player_id']))
256
  hold_display = roo_data
257
  dk_lineups = init_DK_lineups('Regular')
258
  fd_lineups = init_FD_lineups('Regular')
 
263
  col1, col2, col3, col4, col5 = st.columns(5)
264
  with col1:
265
  site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'))
266
+ if site_var1 == 'Draftkings':
267
+ id_dict = dk_id_dict.copy()
268
+ id_dict_sd = dk_id_dict_sd.copy()
269
+ elif site_var1 == 'Fanduel':
270
+ id_dict = fd_id_dict.copy()
271
+ id_dict_sd = fd_id_dict_sd.copy()
272
 
273
  with col2:
274
  slate_var1 = st.radio("Which data are you loading?", ('Regular', 'Showdown'))
 
327
  salary_min_var = st.number_input("Minimum salary used", min_value = 0, max_value = 60000, value = 59000, step = 100, key = 'salary_min_var')
328
  salary_max_var = st.number_input("Maximum salary used", min_value = 0, max_value = 60000, value = 60000, step = 100, key = 'salary_max_var')
329
 
330
+ reg_dl_col, filtered_dl_col, blank_dl_col = st.columns([2, 2, 6])
331
+ with reg_dl_col:
332
+ if st.button("Prepare full data export", key='data_export'):
333
+ name_export = pd.DataFrame(st.session_state.working_seed.copy(), columns=column_names)
334
+ data_export = pd.DataFrame(st.session_state.working_seed.copy(), columns=column_names)
335
+ if site_var1 == 'Draftkings':
336
+ if slate_var1 == 'Regular':
337
+ map_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6']
338
+ elif slate_var1 == 'Showdown':
339
+ map_columns = ['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5']
340
+ elif site_var1 == 'Fanduel':
341
+ if slate_var1 == 'Regular':
342
+ map_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6']
343
+ elif slate_var1 == 'Showdown':
344
+ map_columns = ['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4']
345
+ for col_idx in map_columns:
346
+ if slate_var1 == 'Regular':
347
+ data_export[col_idx] = data_export[col_idx].map(id_dict)
348
+ elif slate_var1 == 'Showdown':
349
+ data_export[col_idx] = data_export[col_idx].map(id_dict_sd)
350
+
351
+ pm_name_export = name_export.drop(columns=['salary', 'proj', 'Own'], axis=1)
352
+ pm_data_export = data_export.drop(columns=['salary', 'proj', 'Own'], axis=1)
353
+ reg_opt_col, pm_opt_col = st.columns(2)
354
+
355
+ with reg_opt_col:
356
+ st.download_button(
357
+ label="Export optimals set (IDs)",
358
+ data=convert_df(data_export),
359
+ file_name='PGA_optimals_export.csv',
360
+ mime='text/csv',
361
+ )
362
+ st.download_button(
363
+ label="Export optimals set (Names)",
364
+ data=convert_df(name_export),
365
+ file_name='PGA_optimals_export.csv',
366
+ mime='text/csv',
367
+ )
368
+ with pm_opt_col:
369
+ st.download_button(
370
+ label="Portfolio Manager Export (IDs)",
371
+ data=convert_pm_df(pm_data_export),
372
+ file_name='PGA_optimals_export.csv',
373
+ mime='text/csv',
374
+ )
375
+ st.download_button(
376
+ label="Portfolio Manager Export (Names)",
377
+ data=convert_pm_df(pm_name_export),
378
+ file_name='PGA_optimals_export.csv',
379
+ mime='text/csv',
380
+ )
381
+ with filtered_dl_col:
382
+ if st.button("Prepare full data export (Filtered)", key='data_export_filtered'):
383
+ name_export = pd.DataFrame(st.session_state.working_seed.copy(), columns=column_names)
384
+ data_export = pd.DataFrame(st.session_state.working_seed.copy(), columns=column_names)
385
+ if site_var1 == 'Draftkings':
386
+ if slate_var1 == 'Regular':
387
+ map_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6']
388
+ elif slate_var1 == 'Showdown':
389
+ map_columns = ['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5']
390
+ elif site_var1 == 'Fanduel':
391
+ if slate_var1 == 'Regular':
392
+ map_columns = ['FLEX1', 'FLEX2', 'FLEX3', 'FLEX4', 'FLEX5', 'FLEX6']
393
+ elif slate_var1 == 'Showdown':
394
+ map_columns = ['CPT', 'FLEX1', 'FLEX2', 'FLEX3', 'FLEX4']
395
+ for col_idx in map_columns:
396
+ if slate_var1 == 'Regular':
397
+ data_export[col_idx] = data_export[col_idx].map(id_dict)
398
+ elif slate_var1 == 'Showdown':
399
+ data_export[col_idx] = data_export[col_idx].map(id_dict_sd)
400
+ data_export = data_export[data_export['salary'] >= salary_min_var]
401
+ data_export = data_export[data_export['salary'] <= salary_max_var]
402
+
403
+ name_export = name_export[name_export['salary'] >= salary_min_var]
404
+ name_export = name_export[name_export['salary'] <= salary_max_var]
405
+
406
+ pm_name_export = name_export.drop(columns=['salary', 'proj', 'Own'], axis=1)
407
+ pm_data_export = data_export.drop(columns=['salary', 'proj', 'Own'], axis=1)
408
+
409
+ reg_opt_col, pm_opt_col = st.columns(2)
410
+ with reg_opt_col:
411
+ st.download_button(
412
+ label="Export optimals set (IDs)",
413
+ data=convert_df(data_export),
414
+ file_name='PGA_optimals_export.csv',
415
+ mime='text/csv',
416
+ )
417
+ st.download_button(
418
+ label="Export optimals set (Names)",
419
+ data=convert_df(name_export),
420
+ file_name='PGA_optimals_export.csv',
421
+ mime='text/csv',
422
+ )
423
+ with pm_opt_col:
424
+ st.download_button(
425
+ label="Portfolio Manager Export (IDs)",
426
+ data=convert_pm_df(pm_data_export),
427
+ file_name='PGA_optimals_export.csv',
428
+ mime='text/csv',
429
+ )
430
+ st.download_button(
431
+ label="Portfolio Manager Export (Names)",
432
+ data=convert_pm_df(pm_name_export),
433
+ file_name='PGA_optimals_export.csv',
434
+ mime='text/csv',
435
+ )
436
 
437
  if site_var1 == 'Draftkings':
438
  if 'working_seed' in st.session_state:
 
491
  st.download_button(
492
  label="Export display optimals",
493
  data=convert_df(export_file),
494
+ file_name='PGA_display_optimals.csv',
495
  mime='text/csv',
496
  )
497