James McCool commited on
Commit
0841c51
·
1 Parent(s): 007d3db

Add MongoDB integration and contest data retrieval in app.py and grab_contest_data.py

Browse files

- Implemented MongoDB connection and data retrieval functions in app.py to fetch contest names and related information for MLB.
- Created a new module, grab_contest_data.py, to handle fetching and formatting contest data from an external API, enhancing data management capabilities.
- Updated the Streamlit interface to allow users to select between manual file uploads and database searches for contest data.

Files changed (2) hide show
  1. app.py +34 -3
  2. global_func/grab_contest_data.py +72 -0
app.py CHANGED
@@ -5,6 +5,30 @@ import pandas as pd
5
  import time
6
  from fuzzywuzzy import process
7
  from collections import Counter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  ## import global functions
10
  from global_func.load_contest_file import load_contest_file
@@ -14,6 +38,7 @@ from global_func.create_player_exposures import create_player_exposures
14
  from global_func.create_stack_exposures import create_stack_exposures
15
  from global_func.create_stack_size_exposures import create_stack_size_exposures
16
  from global_func.create_general_exposures import create_general_exposures
 
17
 
18
  player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
19
 
@@ -21,10 +46,12 @@ tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"])
21
  with tab1:
22
  if st.button('Clear data', key='reset1'):
23
  st.session_state.clear()
24
- col1, col2 = st.columns(2)
25
  with col1:
26
- sport_select = st.selectbox("Select Sport", ['MLB', 'NBA', 'NFL'])
27
  with col2:
 
 
28
  type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
29
  # Add file uploaders to your app
30
  col1, col2 = st.columns(2)
@@ -32,7 +59,11 @@ with tab1:
32
  with col1:
33
  st.subheader("Contest File")
34
  st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.")
35
- Contest_file = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls'])
 
 
 
 
36
  if 'Contest' in st.session_state:
37
  del st.session_state['Contest']
38
 
 
5
  import time
6
  from fuzzywuzzy import process
7
  from collections import Counter
8
+ from pymongo.mongo_client import MongoClient
9
+ from pymongo.server_api import ServerApi
10
+
11
+ def init_conn():
12
+
13
+ uri = st.secrets['mongo_uri']
14
+ client = MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
15
+ db = client['Contest_Information']
16
+
17
+ return db
18
+
19
+ def grab_contest_names(db, sport):
20
+ collection = db[f'{sport}_contest_info']
21
+ cursor = collection.find()
22
+
23
+ curr_info = pd.DataFrame(list(cursor)).drop('_id', axis=1)
24
+ contest_names = curr_info['Contest Name']
25
+ contest_id_map = dict(zip(curr_info['Contest Name'], curr_info['Contest ID']))
26
+ contest_date_map = dict(zip(curr_info['Contest Name'], curr_info['Date']))
27
+
28
+ return contest_names, contest_id_map, contest_date_map
29
+
30
+ db = init_conn()
31
+ contest_names, contest_id_map, contest_date_map = grab_contest_names(db, 'MLB')
32
 
33
  ## import global functions
34
  from global_func.load_contest_file import load_contest_file
 
38
  from global_func.create_stack_exposures import create_stack_exposures
39
  from global_func.create_stack_size_exposures import create_stack_size_exposures
40
  from global_func.create_general_exposures import create_general_exposures
41
+ from global_func.grab_contest_data import grab_contest_data
42
 
43
  player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
44
 
 
46
  with tab1:
47
  if st.button('Clear data', key='reset1'):
48
  st.session_state.clear()
49
+ col1, col2, col3 = st.columns(3)
50
  with col1:
51
+ parse_type = st.selectbox("Manual upload or DB search?", ['Manual', 'DB Search'])
52
  with col2:
53
+ sport_select = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
54
+ with col3:
55
  type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
56
  # Add file uploaders to your app
57
  col1, col2 = st.columns(2)
 
59
  with col1:
60
  st.subheader("Contest File")
61
  st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.")
62
+ if parse_type == 'DB Search':
63
+ contest_name_var = st.selectbox("Select Contest to load", contest_names)
64
+ Contest_file = grab_contest_data('MLB', contest_name_var, contest_id_map, contest_date_map)
65
+ elif parse_type == 'Manual':
66
+ Contest_file = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls'])
67
  if 'Contest' in st.session_state:
68
  del st.session_state['Contest']
69
 
global_func/grab_contest_data.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import requests
3
+
4
+ def grab_contest_data(sport, contest_name, contest_id_map, contest_date_map):
5
+
6
+ contest_date = contest_date_map[contest_name]
7
+ contest_id = contest_id_map[contest_name]
8
+
9
+ raw_url = f'https://dh5nxc6yx3kwy.cloudfront.net/contests/{sport.lower()}/{contest_date}/{contest_id}/'
10
+ data_url = raw_url + 'data/'
11
+ lineups_url = raw_url + 'lineups/'
12
+
13
+ def format_lineup_string(lineup_hash, positions):
14
+ """Replaces colons in a lineup hash with sequential positions."""
15
+ # Remove the leading colon and split by the remaining colons
16
+ player_ids = lineup_hash.lstrip(':').split(':')
17
+
18
+ # Check if the number of IDs matches the number of positions
19
+ if len(player_ids) != len(positions):
20
+ # Handle potential errors - maybe return the original hash or log a warning
21
+ print(f"Warning: Mismatch for hash {lineup_hash}. IDs: {len(player_ids)}, Positions: {len(positions)}")
22
+ return lineup_hash # Or some other error indication
23
+
24
+ # Combine positions and player IDs
25
+ combined_parts = [pos + pid for pos, pid in zip(positions, player_ids)]
26
+
27
+ # Join them into a single string
28
+ return "".join(combined_parts)
29
+
30
+ lineups_json = requests.get(lineups_url).json()
31
+ data_json = requests.get(data_url).json()
32
+
33
+ lineup_data = []
34
+ player_data = []
35
+ position_inserts = ['1B ', ' 2B ', ' 3B ', ' C ', ' OF ', ' OF ', ' OF ', ' P ', ' P ', ' SS ']
36
+
37
+ for players, player_info in data_json['players'].items():
38
+ player_data.append({
39
+ 'fullName': player_info['fullName'],
40
+ 'playerId': player_info['playerId'],
41
+ 'rosterPosition': player_info['rosterPosition'],
42
+ 'ownership': player_info['ownership'],
43
+ 'actualPoints': player_info['actualPoints']
44
+ })
45
+
46
+ players_df = pd.DataFrame(player_data)
47
+ players_df = players_df.sort_values(by='ownership', ascending=False).reset_index(drop=True)
48
+ players_df = players_df.rename(columns={'fullName': 'Player', 'rosterPosition': 'Roster Position', 'ownership': '%Drafted', 'actualPoints': 'FPTS'})
49
+ pid_map = dict(zip(players_df['playerId'].astype(str), players_df['Player']))
50
+
51
+ for lineup_hash, lineup_info in lineups_json['lineups'].items():
52
+ lineup_data.append({
53
+ 'lineupHash': lineup_hash,
54
+ 'points': lineup_info['points'],
55
+ 'entryNameList': lineup_info['entryNameList'][0]
56
+ })
57
+
58
+ lineups_df = pd.DataFrame(lineup_data)
59
+ lineups_df = lineups_df.sort_values(by='points', ascending=False)
60
+ lineups_df = lineups_df.reset_index()
61
+ lineups_df['index'] = lineups_df.index + 1
62
+ lineups_df['TimeRemaining'] = str(0)
63
+ lineups_df['EntryId'] = lineups_df['lineupHash'].astype(str) + str(lineups_df['index']) + str(lineups_df['entryNameList'])
64
+ lineups_df['lineupHash'] = ':' + lineups_df['lineupHash']
65
+ lineups_df = lineups_df.rename(columns={'index': 'Rank', 'points': 'Points', 'entryNameList': 'EntryName', 'lineupHash': 'Lineup'})
66
+ lineups_df['Lineup'] = lineups_df['Lineup'].apply(lambda x: format_lineup_string(x, position_inserts))
67
+ lineups_df['Lineup'] = lineups_df['Lineup'].replace(pid_map, regex=True)
68
+ lineups_df = lineups_df[['Rank', 'EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup']]
69
+
70
+ total_data = lineups_df.merge(players_df, how='left', left_index=True, right_index=True)
71
+
72
+ return total_data.to_csv(f'{contest_name}.csv', index=False)