Spaces:

Multichem-PD
/

DFS_Contest_Analyzer

Running

James McCool commited on Apr 23

Commit

0841c51

1 Parent(s): 007d3db

Add MongoDB integration and contest data retrieval in app.py and grab_contest_data.py

- Implemented MongoDB connection and data retrieval functions in app.py to fetch contest names and related information for MLB.
- Created a new module, grab_contest_data.py, to handle fetching and formatting contest data from an external API, enhancing data management capabilities.
- Updated the Streamlit interface to allow users to select between manual file uploads and database searches for contest data.

Files changed (2) hide show

app.py +34 -3
global_func/grab_contest_data.py +72 -0

app.py CHANGED Viewed

@@ -5,6 +5,30 @@ import pandas as pd
 import time
 from fuzzywuzzy import process
 from collections import Counter
 ## import global functions
 from global_func.load_contest_file import load_contest_file
@@ -14,6 +38,7 @@ from global_func.create_player_exposures import create_player_exposures
 from global_func.create_stack_exposures import create_stack_exposures
 from global_func.create_stack_size_exposures import create_stack_size_exposures
 from global_func.create_general_exposures import create_general_exposures
 player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
@@ -21,10 +46,12 @@ tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"])
 with tab1:
     if st.button('Clear data', key='reset1'):
         st.session_state.clear()
-    col1, col2 = st.columns(2)
     with col1:
-        sport_select = st.selectbox("Select Sport", ['MLB', 'NBA', 'NFL'])
     with col2:
         type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
     # Add file uploaders to your app
     col1, col2 = st.columns(2)
@@ -32,7 +59,11 @@ with tab1:
     with col1:
         st.subheader("Contest File")
         st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.")
-        Contest_file = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls'])
         if 'Contest' in st.session_state:
             del st.session_state['Contest']

 import time
 from fuzzywuzzy import process
 from collections import Counter
+from pymongo.mongo_client import MongoClient
+from pymongo.server_api import ServerApi
+def init_conn():
+        uri = st.secrets['mongo_uri']
+        client = MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
+        db = client['Contest_Information']
+        return db
+def grab_contest_names(db, sport):
+    collection = db[f'{sport}_contest_info']
+    cursor = collection.find()
+    curr_info = pd.DataFrame(list(cursor)).drop('_id', axis=1)
+    contest_names = curr_info['Contest Name']
+    contest_id_map = dict(zip(curr_info['Contest Name'], curr_info['Contest ID']))
+    contest_date_map = dict(zip(curr_info['Contest Name'], curr_info['Date']))
+    return contest_names, contest_id_map, contest_date_map
+db = init_conn()
+contest_names, contest_id_map, contest_date_map = grab_contest_names(db, 'MLB')
 ## import global functions
 from global_func.load_contest_file import load_contest_file
 from global_func.create_stack_exposures import create_stack_exposures
 from global_func.create_stack_size_exposures import create_stack_size_exposures
 from global_func.create_general_exposures import create_general_exposures
+from global_func.grab_contest_data import grab_contest_data
 player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
 with tab1:
     if st.button('Clear data', key='reset1'):
         st.session_state.clear()
+    col1, col2, col3 = st.columns(3)
     with col1:
+        parse_type = st.selectbox("Manual upload or DB search?", ['Manual', 'DB Search'])
     with col2:
+        sport_select = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
+    with col3:
         type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
     # Add file uploaders to your app
     col1, col2 = st.columns(2)
     with col1:
         st.subheader("Contest File")
         st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.")
+        if parse_type == 'DB Search':
+            contest_name_var = st.selectbox("Select Contest to load", contest_names)
+            Contest_file = grab_contest_data('MLB', contest_name_var, contest_id_map, contest_date_map)
+        elif parse_type == 'Manual':
+            Contest_file = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls'])
         if 'Contest' in st.session_state:
             del st.session_state['Contest']

global_func/grab_contest_data.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import pandas as pd
+import requests
+def grab_contest_data(sport, contest_name, contest_id_map, contest_date_map):
+    contest_date = contest_date_map[contest_name]
+    contest_id = contest_id_map[contest_name]
+    raw_url = f'https://dh5nxc6yx3kwy.cloudfront.net/contests/{sport.lower()}/{contest_date}/{contest_id}/'
+    data_url = raw_url + 'data/'
+    lineups_url = raw_url + 'lineups/'
+    def format_lineup_string(lineup_hash, positions):
+        """Replaces colons in a lineup hash with sequential positions."""
+        # Remove the leading colon and split by the remaining colons
+        player_ids = lineup_hash.lstrip(':').split(':')
+        # Check if the number of IDs matches the number of positions
+        if len(player_ids) != len(positions):
+            # Handle potential errors - maybe return the original hash or log a warning
+            print(f"Warning: Mismatch for hash {lineup_hash}. IDs: {len(player_ids)}, Positions: {len(positions)}")
+            return lineup_hash # Or some other error indication
+        # Combine positions and player IDs
+        combined_parts = [pos + pid for pos, pid in zip(positions, player_ids)]
+        # Join them into a single string
+        return "".join(combined_parts)
+    lineups_json = requests.get(lineups_url).json()
+    data_json = requests.get(data_url).json()
+    lineup_data = []
+    player_data = []
+    position_inserts = ['1B ', ' 2B ', ' 3B ', ' C ', ' OF ', ' OF ', ' OF ', ' P ', ' P ', ' SS ']
+    for players, player_info in data_json['players'].items():
+        player_data.append({
+            'fullName': player_info['fullName'],
+            'playerId': player_info['playerId'],
+            'rosterPosition': player_info['rosterPosition'],
+            'ownership': player_info['ownership'],
+            'actualPoints': player_info['actualPoints']
+        })
+    players_df = pd.DataFrame(player_data)
+    players_df = players_df.sort_values(by='ownership', ascending=False).reset_index(drop=True)
+    players_df = players_df.rename(columns={'fullName': 'Player', 'rosterPosition': 'Roster Position', 'ownership': '%Drafted', 'actualPoints': 'FPTS'})
+    pid_map = dict(zip(players_df['playerId'].astype(str), players_df['Player']))
+    for lineup_hash, lineup_info in lineups_json['lineups'].items():
+        lineup_data.append({
+            'lineupHash': lineup_hash,
+            'points': lineup_info['points'],
+            'entryNameList': lineup_info['entryNameList'][0]
+        })
+    lineups_df = pd.DataFrame(lineup_data)
+    lineups_df = lineups_df.sort_values(by='points', ascending=False)
+    lineups_df = lineups_df.reset_index()
+    lineups_df['index'] = lineups_df.index + 1
+    lineups_df['TimeRemaining'] = str(0)
+    lineups_df['EntryId'] = lineups_df['lineupHash'].astype(str) + str(lineups_df['index']) + str(lineups_df['entryNameList'])
+    lineups_df['lineupHash'] = ':' + lineups_df['lineupHash']
+    lineups_df = lineups_df.rename(columns={'index': 'Rank', 'points': 'Points', 'entryNameList': 'EntryName', 'lineupHash': 'Lineup'})
+    lineups_df['Lineup'] = lineups_df['Lineup'].apply(lambda x: format_lineup_string(x, position_inserts))
+    lineups_df['Lineup'] = lineups_df['Lineup'].replace(pid_map, regex=True)
+    lineups_df = lineups_df[['Rank', 'EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup']]
+    total_data = lineups_df.merge(players_df, how='left', left_index=True, right_index=True)
+    return total_data.to_csv(f'{contest_name}.csv', index=False)