James McCool
commited on
Commit
·
0841c51
1
Parent(s):
007d3db
Add MongoDB integration and contest data retrieval in app.py and grab_contest_data.py
Browse files- Implemented MongoDB connection and data retrieval functions in app.py to fetch contest names and related information for MLB.
- Created a new module, grab_contest_data.py, to handle fetching and formatting contest data from an external API, enhancing data management capabilities.
- Updated the Streamlit interface to allow users to select between manual file uploads and database searches for contest data.
- app.py +34 -3
- global_func/grab_contest_data.py +72 -0
app.py
CHANGED
@@ -5,6 +5,30 @@ import pandas as pd
|
|
5 |
import time
|
6 |
from fuzzywuzzy import process
|
7 |
from collections import Counter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
## import global functions
|
10 |
from global_func.load_contest_file import load_contest_file
|
@@ -14,6 +38,7 @@ from global_func.create_player_exposures import create_player_exposures
|
|
14 |
from global_func.create_stack_exposures import create_stack_exposures
|
15 |
from global_func.create_stack_size_exposures import create_stack_size_exposures
|
16 |
from global_func.create_general_exposures import create_general_exposures
|
|
|
17 |
|
18 |
player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
|
19 |
|
@@ -21,10 +46,12 @@ tab1, tab2 = st.tabs(["Data Load", "Contest Analysis"])
|
|
21 |
with tab1:
|
22 |
if st.button('Clear data', key='reset1'):
|
23 |
st.session_state.clear()
|
24 |
-
col1, col2 = st.columns(
|
25 |
with col1:
|
26 |
-
|
27 |
with col2:
|
|
|
|
|
28 |
type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
|
29 |
# Add file uploaders to your app
|
30 |
col1, col2 = st.columns(2)
|
@@ -32,7 +59,11 @@ with tab1:
|
|
32 |
with col1:
|
33 |
st.subheader("Contest File")
|
34 |
st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.")
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
if 'Contest' in st.session_state:
|
37 |
del st.session_state['Contest']
|
38 |
|
|
|
5 |
import time
|
6 |
from fuzzywuzzy import process
|
7 |
from collections import Counter
|
8 |
+
from pymongo.mongo_client import MongoClient
|
9 |
+
from pymongo.server_api import ServerApi
|
10 |
+
|
11 |
+
def init_conn():
|
12 |
+
|
13 |
+
uri = st.secrets['mongo_uri']
|
14 |
+
client = MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
|
15 |
+
db = client['Contest_Information']
|
16 |
+
|
17 |
+
return db
|
18 |
+
|
19 |
+
def grab_contest_names(db, sport):
|
20 |
+
collection = db[f'{sport}_contest_info']
|
21 |
+
cursor = collection.find()
|
22 |
+
|
23 |
+
curr_info = pd.DataFrame(list(cursor)).drop('_id', axis=1)
|
24 |
+
contest_names = curr_info['Contest Name']
|
25 |
+
contest_id_map = dict(zip(curr_info['Contest Name'], curr_info['Contest ID']))
|
26 |
+
contest_date_map = dict(zip(curr_info['Contest Name'], curr_info['Date']))
|
27 |
+
|
28 |
+
return contest_names, contest_id_map, contest_date_map
|
29 |
+
|
30 |
+
db = init_conn()
|
31 |
+
contest_names, contest_id_map, contest_date_map = grab_contest_names(db, 'MLB')
|
32 |
|
33 |
## import global functions
|
34 |
from global_func.load_contest_file import load_contest_file
|
|
|
38 |
from global_func.create_stack_exposures import create_stack_exposures
|
39 |
from global_func.create_stack_size_exposures import create_stack_size_exposures
|
40 |
from global_func.create_general_exposures import create_general_exposures
|
41 |
+
from global_func.grab_contest_data import grab_contest_data
|
42 |
|
43 |
player_exposure_format = {'Exposure Overall': '{:.2%}', 'Exposure Top 1%': '{:.2%}', 'Exposure Top 5%': '{:.2%}', 'Exposure Top 10%': '{:.2%}', 'Exposure Top 20%': '{:.2%}'}
|
44 |
|
|
|
46 |
with tab1:
|
47 |
if st.button('Clear data', key='reset1'):
|
48 |
st.session_state.clear()
|
49 |
+
col1, col2, col3 = st.columns(3)
|
50 |
with col1:
|
51 |
+
parse_type = st.selectbox("Manual upload or DB search?", ['Manual', 'DB Search'])
|
52 |
with col2:
|
53 |
+
sport_select = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
|
54 |
+
with col3:
|
55 |
type_var = st.selectbox("Select Game Type", ['Classic', 'Showdown'])
|
56 |
# Add file uploaders to your app
|
57 |
col1, col2 = st.columns(2)
|
|
|
59 |
with col1:
|
60 |
st.subheader("Contest File")
|
61 |
st.info("Go ahead and upload a Contest file here. Only include player columns and an optional 'Stack' column if you are playing MLB.")
|
62 |
+
if parse_type == 'DB Search':
|
63 |
+
contest_name_var = st.selectbox("Select Contest to load", contest_names)
|
64 |
+
Contest_file = grab_contest_data('MLB', contest_name_var, contest_id_map, contest_date_map)
|
65 |
+
elif parse_type == 'Manual':
|
66 |
+
Contest_file = st.file_uploader("Upload Contest File (CSV or Excel)", type=['csv', 'xlsx', 'xls'])
|
67 |
if 'Contest' in st.session_state:
|
68 |
del st.session_state['Contest']
|
69 |
|
global_func/grab_contest_data.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import requests
|
3 |
+
|
4 |
+
def grab_contest_data(sport, contest_name, contest_id_map, contest_date_map):
|
5 |
+
|
6 |
+
contest_date = contest_date_map[contest_name]
|
7 |
+
contest_id = contest_id_map[contest_name]
|
8 |
+
|
9 |
+
raw_url = f'https://dh5nxc6yx3kwy.cloudfront.net/contests/{sport.lower()}/{contest_date}/{contest_id}/'
|
10 |
+
data_url = raw_url + 'data/'
|
11 |
+
lineups_url = raw_url + 'lineups/'
|
12 |
+
|
13 |
+
def format_lineup_string(lineup_hash, positions):
|
14 |
+
"""Replaces colons in a lineup hash with sequential positions."""
|
15 |
+
# Remove the leading colon and split by the remaining colons
|
16 |
+
player_ids = lineup_hash.lstrip(':').split(':')
|
17 |
+
|
18 |
+
# Check if the number of IDs matches the number of positions
|
19 |
+
if len(player_ids) != len(positions):
|
20 |
+
# Handle potential errors - maybe return the original hash or log a warning
|
21 |
+
print(f"Warning: Mismatch for hash {lineup_hash}. IDs: {len(player_ids)}, Positions: {len(positions)}")
|
22 |
+
return lineup_hash # Or some other error indication
|
23 |
+
|
24 |
+
# Combine positions and player IDs
|
25 |
+
combined_parts = [pos + pid for pos, pid in zip(positions, player_ids)]
|
26 |
+
|
27 |
+
# Join them into a single string
|
28 |
+
return "".join(combined_parts)
|
29 |
+
|
30 |
+
lineups_json = requests.get(lineups_url).json()
|
31 |
+
data_json = requests.get(data_url).json()
|
32 |
+
|
33 |
+
lineup_data = []
|
34 |
+
player_data = []
|
35 |
+
position_inserts = ['1B ', ' 2B ', ' 3B ', ' C ', ' OF ', ' OF ', ' OF ', ' P ', ' P ', ' SS ']
|
36 |
+
|
37 |
+
for players, player_info in data_json['players'].items():
|
38 |
+
player_data.append({
|
39 |
+
'fullName': player_info['fullName'],
|
40 |
+
'playerId': player_info['playerId'],
|
41 |
+
'rosterPosition': player_info['rosterPosition'],
|
42 |
+
'ownership': player_info['ownership'],
|
43 |
+
'actualPoints': player_info['actualPoints']
|
44 |
+
})
|
45 |
+
|
46 |
+
players_df = pd.DataFrame(player_data)
|
47 |
+
players_df = players_df.sort_values(by='ownership', ascending=False).reset_index(drop=True)
|
48 |
+
players_df = players_df.rename(columns={'fullName': 'Player', 'rosterPosition': 'Roster Position', 'ownership': '%Drafted', 'actualPoints': 'FPTS'})
|
49 |
+
pid_map = dict(zip(players_df['playerId'].astype(str), players_df['Player']))
|
50 |
+
|
51 |
+
for lineup_hash, lineup_info in lineups_json['lineups'].items():
|
52 |
+
lineup_data.append({
|
53 |
+
'lineupHash': lineup_hash,
|
54 |
+
'points': lineup_info['points'],
|
55 |
+
'entryNameList': lineup_info['entryNameList'][0]
|
56 |
+
})
|
57 |
+
|
58 |
+
lineups_df = pd.DataFrame(lineup_data)
|
59 |
+
lineups_df = lineups_df.sort_values(by='points', ascending=False)
|
60 |
+
lineups_df = lineups_df.reset_index()
|
61 |
+
lineups_df['index'] = lineups_df.index + 1
|
62 |
+
lineups_df['TimeRemaining'] = str(0)
|
63 |
+
lineups_df['EntryId'] = lineups_df['lineupHash'].astype(str) + str(lineups_df['index']) + str(lineups_df['entryNameList'])
|
64 |
+
lineups_df['lineupHash'] = ':' + lineups_df['lineupHash']
|
65 |
+
lineups_df = lineups_df.rename(columns={'index': 'Rank', 'points': 'Points', 'entryNameList': 'EntryName', 'lineupHash': 'Lineup'})
|
66 |
+
lineups_df['Lineup'] = lineups_df['Lineup'].apply(lambda x: format_lineup_string(x, position_inserts))
|
67 |
+
lineups_df['Lineup'] = lineups_df['Lineup'].replace(pid_map, regex=True)
|
68 |
+
lineups_df = lineups_df[['Rank', 'EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup']]
|
69 |
+
|
70 |
+
total_data = lineups_df.merge(players_df, how='left', left_index=True, right_index=True)
|
71 |
+
|
72 |
+
return total_data.to_csv(f'{contest_name}.csv', index=False)
|