James McCool
commited on
Commit
·
ab2c04b
1
Parent(s):
1f1bb40
Refactor Streamlit app to use MongoDB and streamline data loading
Browse files- Removed Google Sheets integration and replaced it with MongoDB for data retrieval.
- Simplified data loading functions and improved caching mechanisms.
- Enhanced user interface with new tabs for pitchers, hitters, and teams, allowing for better data organization and display.
- Updated styling for improved user experience.
- src/streamlit_app.py +149 -363
src/streamlit_app.py
CHANGED
@@ -1,371 +1,157 @@
|
|
1 |
-
import
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
-
import
|
5 |
-
import gspread
|
6 |
-
from itertools import combinations
|
7 |
-
|
8 |
-
scope = ['https://www.googleapis.com/auth/spreadsheets',
|
9 |
-
"https://www.googleapis.com/auth/drive"]
|
10 |
-
|
11 |
-
credentials = {
|
12 |
-
"type": "service_account",
|
13 |
-
"project_id": "model-sheets-connect",
|
14 |
-
"private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e",
|
15 |
-
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n",
|
16 |
-
"client_email": "[email protected]",
|
17 |
-
"client_id": "100369174533302798535",
|
18 |
-
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
19 |
-
"token_uri": "https://oauth2.googleapis.com/token",
|
20 |
-
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
21 |
-
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com"
|
22 |
-
}
|
23 |
-
|
24 |
-
gc = gspread.service_account_from_dict(credentials)
|
25 |
|
26 |
st.set_page_config(layout="wide")
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
BP_league_format = ['Strikeoutper', 'Walkper','xBA', 'xSLG', 'BABIP', 'xwOBA', 'AVG', 'HWS Ratio']
|
34 |
-
hitter_format = {'K%': '{:.2%}', 'xHR/PA': '{:.2%}', 'Event/PA': '{:.2%}'}
|
35 |
-
offense_format = {'8+ For': '{:.2%}', '8+ For L5': '{:.2%}', '8+ For L10': '{:.2%}', 'Trending 8+ For': '{:.2%}'}
|
36 |
-
defense_format = {'8+ Allowed': '{:.2%}', '8+ Allowed L5': '{:.2%}', '8+ Allowed L10': '{:.2%}', 'Trending 8+ Allowed': '{:.2%}'}
|
37 |
-
R2_format = {'R2_to_Opp_szn': '{:.2%}', 'R2_to_Opp_sample': '{:.2%}', 'R2_to_Opp L5': '{:.2%}', 'R2_to_Opp L10': '{:.2%}', 'R2_to_Opp_Trend': '{:.2%}'}
|
38 |
-
|
39 |
-
data_hold = 'https://docs.google.com/spreadsheets/d/1f42Ergav8K1VsOLOK9MUn7DM_MLMvv4GR2Fy7EfnZTc/edit#gid=500994479'
|
40 |
-
|
41 |
-
sh = gc.open_by_url(data_hold)
|
42 |
-
|
43 |
-
@st.cache_resource(ttl = 300)
|
44 |
-
def load_time():
|
45 |
-
worksheet = sh.worksheet('Timestamp')
|
46 |
-
raw_stamp = worksheet.acell('a1').value
|
47 |
-
|
48 |
-
t_stamp = f"Last update was at {raw_stamp}"
|
49 |
-
|
50 |
-
return t_stamp
|
51 |
-
|
52 |
-
@st.cache_resource(ttl = 299)
|
53 |
-
def load_table(URL, specific_tab):
|
54 |
-
worksheet = sh.worksheet(specific_tab)
|
55 |
-
load_display = pd.DataFrame(worksheet.get_all_records())
|
56 |
-
|
57 |
-
return load_display
|
58 |
|
59 |
-
|
60 |
-
def True_AVG_Splits_load():
|
61 |
-
|
62 |
-
sh = gc.open_by_url(data_hold)
|
63 |
-
worksheet = sh.worksheet('True_AVG_Split')
|
64 |
-
pitcher_stats = pd.DataFrame(worksheet.get_all_records())
|
65 |
-
pitcher_stats.apply(pd.to_numeric, errors='ignore')
|
66 |
-
pitcher_stats = pitcher_stats.drop(columns=['HWSr (LHH)', 'HWSr (RHH)', 'HWSr (Overall)', 'Weighted HWSr',])
|
67 |
-
pitcher_stats = pitcher_stats.dropna()
|
68 |
-
pitcher_stats = pitcher_stats.sort_values(by='Weighted True AVG', ascending=True)
|
69 |
-
|
70 |
-
return pitcher_stats
|
71 |
-
|
72 |
-
@st.cache_resource(ttl = 299)
|
73 |
-
def HWSr_Splits_load():
|
74 |
-
|
75 |
-
sh = gc.open_by_url(data_hold)
|
76 |
-
worksheet = sh.worksheet('True_AVG_Split')
|
77 |
-
pitcher_stats = pd.DataFrame(worksheet.get_all_records())
|
78 |
-
pitcher_stats.apply(pd.to_numeric, errors='ignore')
|
79 |
-
pitcher_stats = pitcher_stats.drop(columns=['True AVG (LHH)', 'True AVG (RHH)', 'True AVG (Overall)', 'Weighted True AVG',])
|
80 |
-
pitcher_stats = pitcher_stats.dropna()
|
81 |
-
pitcher_stats = pitcher_stats.sort_values(by='Weighted HWSr', ascending=True)
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
worksheet = sh.worksheet('Pitcher_xData_RHH')
|
147 |
-
pitcher_stats = pd.DataFrame(worksheet.get_all_records())
|
148 |
-
pitcher_stats.apply(pd.to_numeric, errors='ignore')
|
149 |
-
pitcher_stats = pitcher_stats.dropna()
|
150 |
-
pitcher_stats = pitcher_stats[['Player', 'PA', 'Hits', 'Singles', 'Doubles', 'Homeruns', 'Strikeoutper', 'Strikeouts', 'Walkper', 'Walks', 'xSLG', 'xwOBA', 'BABIP', 'AVG', 'xBA', 'True_AVG', 'xHRs']]
|
151 |
-
pitcher_stats = pitcher_stats.sort_values(by='PA', ascending=False)
|
152 |
-
pitcher_stats = pitcher_stats.drop_duplicates(subset='Player')
|
153 |
-
pitcher_stats = pitcher_stats.set_index('Player')
|
154 |
-
|
155 |
-
return pitcher_stats
|
156 |
-
|
157 |
-
@st.cache_resource(ttl = 299)
|
158 |
-
def Full_LHH_load():
|
159 |
-
|
160 |
-
sh = gc.open_by_url(data_hold)
|
161 |
-
worksheet = sh.worksheet('Pitcher_xData_LHH')
|
162 |
-
pitcher_stats = pd.DataFrame(worksheet.get_all_records())
|
163 |
-
pitcher_stats.apply(pd.to_numeric, errors='ignore')
|
164 |
-
pitcher_stats = pitcher_stats.dropna()
|
165 |
-
pitcher_stats = pitcher_stats[['Player', 'PA', 'Hits', 'Singles', 'Doubles', 'Homeruns', 'Strikeoutper', 'Strikeouts', 'Walkper', 'Walks', 'xSLG', 'xwOBA', 'BABIP', 'AVG', 'xBA', 'True_AVG', 'xHRs']]
|
166 |
-
pitcher_stats = pitcher_stats.sort_values(by='PA', ascending=False)
|
167 |
-
pitcher_stats = pitcher_stats.drop_duplicates(subset='Player')
|
168 |
-
pitcher_stats = pitcher_stats.set_index('Player')
|
169 |
-
|
170 |
-
return pitcher_stats
|
171 |
-
|
172 |
-
@st.cache_resource(ttl = 299)
|
173 |
-
def Bullpen_Data_load():
|
174 |
-
|
175 |
-
sh = gc.open_by_url(data_hold)
|
176 |
-
worksheet = sh.worksheet('Bullpen_xData')
|
177 |
-
pitcher_stats = pd.DataFrame(worksheet.get_all_records())
|
178 |
-
pitcher_stats.apply(pd.to_numeric, errors='ignore')
|
179 |
-
pitcher_stats = pitcher_stats.dropna()
|
180 |
-
for checkVar in range(len(wrong_acro)):
|
181 |
-
pitcher_stats['Names'] = pitcher_stats['Names'].replace(wrong_acro, right_acro)
|
182 |
-
pitcher_stats = pitcher_stats.sort_values(by='xSLG', ascending=False)
|
183 |
-
|
184 |
-
return pitcher_stats
|
185 |
-
|
186 |
-
@st.cache_data
|
187 |
-
def convert_df_to_csv(df):
|
188 |
-
return df.to_csv().encode('utf-8')
|
189 |
-
|
190 |
-
t_stamp = load_time()
|
191 |
-
|
192 |
-
raw_baselines = load_table(data_hold, 'Starting_Pitchers')
|
193 |
-
|
194 |
-
pitcher_stats = load_table(data_hold, 'Starting_Pitchers')
|
195 |
-
|
196 |
-
hitter_stats = load_table(data_hold, 'DK_Slate_hitters')
|
197 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
198 |
-
hitter_stats.apply(pd.to_numeric, errors='ignore')
|
199 |
-
hitter_stats = hitter_stats.dropna(subset=['Order'])
|
200 |
-
hitter_stats = hitter_stats.dropna(subset=['Opp_SP'])
|
201 |
-
|
202 |
-
macro_tables = load_table(data_hold, 'Macro_Trending')
|
203 |
-
|
204 |
-
col1, col2 = st.columns([1, 5])
|
205 |
-
|
206 |
-
with col1:
|
207 |
-
st.info(t_stamp)
|
208 |
-
if st.button("Load/Reset Data", key='reset1'):
|
209 |
-
st.cache_data.clear()
|
210 |
-
t_stamp = load_time()
|
211 |
-
|
212 |
-
pitcher_stats = load_table(data_hold, 'Starting_Pitchers')
|
213 |
-
hitter_stats = load_table(data_hold, 'DK_Slate_hitters')
|
214 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
215 |
-
hitter_stats = hitter_stats.dropna(subset=['Order'])
|
216 |
-
stat_type_var1 = st.radio("Are you looking at pitchers or hitters?", ('Pitchers', 'Hitters'), key='stat_type_var1')
|
217 |
-
if stat_type_var1 == 'Pitchers':
|
218 |
-
stat_var1 = st.radio("What sheets would you like to view?", ('True AVG Splits', 'HWSr Splits', 'Current Slate Stats', 'Stats vs. RHH', 'Stats vs. LHH', 'Full League Stats', 'Full League Stats vs. RHH', 'Full League Stats vs. LHH', 'Bullpen Data'), key='stat_var1')
|
219 |
-
sp_split1 = st.radio("Are you running the full slate or certain games?", ('Full Slate Run', 'Specific Games'), key='sp_split1')
|
220 |
-
if sp_split1 == 'Specific Games':
|
221 |
-
sp_var1 = st.multiselect('Which teams would you like to include in the Table?', options = pitcher_stats['Team'].unique(), key='sp_var1')
|
222 |
-
elif sp_split1 == 'Full Slate Run':
|
223 |
-
sp_var1 = pitcher_stats.Team.values.tolist()
|
224 |
-
elif stat_type_var1 == 'Hitters':
|
225 |
-
site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='site_var1')
|
226 |
-
if site_var1 == "Draftkings":
|
227 |
-
hitter_stats = load_table(data_hold, 'DK_Slate_hitters')
|
228 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
229 |
-
hitter_stats = hitter_stats.dropna(subset=['Order'])
|
230 |
-
elif site_var1 == "Fanduel":
|
231 |
-
hitter_stats = load_table(data_hold, 'FD_Slate_Hitters')
|
232 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
233 |
-
hitter_stats = hitter_stats.dropna(subset=['Order'])
|
234 |
-
stat_var1 = st.radio("What sheets would you like to view?", options = ['Current Slate Player Stats', 'Current Slate Team Stats'], key='stat_var1')
|
235 |
-
split_var1 = st.radio("Are you running the full slate or certain games?", ('Full Slate Run', 'Specific Games'), key='split_var1')
|
236 |
-
pos_split1 = st.radio("Are you viewing all positions or specific positions?", ('All Positions', 'Specific Positions'), key='pos_split1')
|
237 |
-
if pos_split1 == 'Specific Positions':
|
238 |
-
pos_var1 = st.multiselect('What Positions would you like to view?', options = ['C', '1B', '2B', '3B', 'SS', 'OF'])
|
239 |
-
elif pos_split1 == 'All Positions':
|
240 |
-
pos_var1 = 'All'
|
241 |
-
if split_var1 == 'Specific Games':
|
242 |
-
team_var1 = st.multiselect('Which teams would you like to include in the Table?', options = hitter_stats['Team'].unique(), key='team_var1')
|
243 |
-
elif split_var1 == 'Full Slate Run':
|
244 |
-
team_var1 = hitter_stats.Team.values.tolist()
|
245 |
-
|
246 |
-
with col2:
|
247 |
-
if stat_type_var1 == 'Pitchers':
|
248 |
-
if stat_var1 == 'True AVG Splits':
|
249 |
-
pitcher_stats = True_AVG_Splits_load()
|
250 |
-
pitcher_stats = pitcher_stats[pitcher_stats['Team'].isin(sp_var1)]
|
251 |
-
#pitcher_stats = pitcher_stats.set_index('Player')
|
252 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap = 'RdYlGn_r').format(precision=3), use_container_width = True)
|
253 |
-
if stat_var1 == 'HWSr Splits':
|
254 |
-
pitcher_stats = HWSr_Splits_load()
|
255 |
-
pitcher_stats = pitcher_stats[pitcher_stats['Team'].isin(sp_var1)]
|
256 |
-
#pitcher_stats = pitcher_stats.set_index('Player')
|
257 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap = 'RdYlGn_r').format(precision=3), use_container_width = True)
|
258 |
-
elif stat_var1 == 'Current Slate Stats':
|
259 |
-
pitcher_stats = SP_Slate_Stats_load()
|
260 |
-
pitcher_stats = pitcher_stats[pitcher_stats['Team'].isin(sp_var1)]
|
261 |
-
#pitcher_stats = pitcher_stats.set_index('Player')
|
262 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap = 'RdYlGn_r').background_gradient(cmap='RdYlGn', subset='K%').format(SP_format, precision=2), use_container_width = True)
|
263 |
-
elif stat_var1 == 'Stats vs. RHH':
|
264 |
-
pitcher_stats = RHH_load()
|
265 |
-
pitcher_stats = pitcher_stats[pitcher_stats['Team'].isin(sp_var1)]
|
266 |
-
#pitcher_stats = pitcher_stats.set_index('Names')
|
267 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn_r', subset=['Opp RHH', 'Salary', 'BB%', 'True AVG', 'xSLG', 'xBA', 'Hits', 'Homeruns', 'xHRs', 'xHR/PA']).background_gradient(cmap='RdYlGn', subset='K%').format(SP_format, precision=2), use_container_width = True)
|
268 |
-
elif stat_var1 == 'Stats vs. LHH':
|
269 |
-
pitcher_stats = LHH_load()
|
270 |
-
pitcher_stats = pitcher_stats[pitcher_stats['Team'].isin(sp_var1)]
|
271 |
-
#pitcher_stats = pitcher_stats.set_index('Names')
|
272 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn_r', subset=['Opp LHH', 'Salary', 'BB%', 'True AVG', 'xSLG', 'xBA', 'Hits', 'Homeruns', 'xHRs', 'xHR/PA']).background_gradient(cmap='RdYlGn', subset='K%').format(SP_format, precision=2), use_container_width = True)
|
273 |
-
elif stat_var1 == 'Full League Stats':
|
274 |
-
pitcher_stats = Full_Stats_load()
|
275 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn_r').background_gradient(cmap='RdYlGn', subset=['Strikeoutper', 'Strikeouts', 'PA']).format(precision=0).format(precision=3, subset = SP_league_format), use_container_width = True)
|
276 |
-
elif stat_var1 == 'Full League Stats vs. RHH':
|
277 |
-
pitcher_stats = Full_RHH_load()
|
278 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn_r').background_gradient(cmap='RdYlGn', subset=['Strikeoutper', 'Strikeouts', 'PA']).format(precision=0).format(precision=3, subset = SP_league_format), use_container_width = True)
|
279 |
-
elif stat_var1 == 'Full League Stats vs. LHH':
|
280 |
-
pitcher_stats = Full_LHH_load()
|
281 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn_r').background_gradient(cmap='RdYlGn', subset=['Strikeoutper', 'Strikeouts', 'PA']).format(precision=0).format(precision=3, subset = SP_league_format), use_container_width = True)
|
282 |
-
elif stat_var1 == 'Bullpen Data':
|
283 |
-
pitcher_stats = Bullpen_Data_load()
|
284 |
-
pitcher_stats = pitcher_stats[pitcher_stats['Names'].isin(sp_var1)]
|
285 |
-
#pitcher_stats = pitcher_stats.set_index('Names')
|
286 |
-
st.dataframe(pitcher_stats.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn_r').background_gradient(cmap='RdYlGn', subset=['Strikeoutper', 'Strikeouts', 'PA']).format(precision=0).format(precision=3, subset = BP_league_format), use_container_width = True)
|
287 |
-
elif stat_type_var1 == 'Hitters':
|
288 |
-
if stat_var1 == 'Current Slate Player Stats':
|
289 |
-
if site_var1 == 'Draftkings':
|
290 |
-
hitter_stats = load_table(data_hold, 'DK_Slate_hitters')
|
291 |
-
if pos_var1 != 'All':
|
292 |
-
hitter_stats = hitter_stats[hitter_stats['Position'].str.contains('|'.join(pos_var1))]
|
293 |
-
elif site_var1 == 'Fanduel':
|
294 |
-
hitter_stats = load_table(data_hold, 'FD_Slate_Hitters')
|
295 |
-
if pos_var1 != 'All':
|
296 |
-
hitter_stats = hitter_stats[hitter_stats['Position'].str.contains('|'.join(pos_var1))]
|
297 |
-
hitter_stats.apply(pd.to_numeric, errors='ignore')
|
298 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
299 |
-
hitter_stats = hitter_stats.dropna(subset=['Order'])
|
300 |
-
hitter_stats = hitter_stats.dropna(subset=['Opp_SP'])
|
301 |
-
hitter_stats = hitter_stats.drop(columns=['IBB'])
|
302 |
-
hitter_stats = hitter_stats.sort_values(by='Event/PA', ascending=False)
|
303 |
-
hitter_stats = hitter_stats.set_index('Player')
|
304 |
-
hitter_stats = hitter_stats[hitter_stats['Team'].isin(team_var1)]
|
305 |
-
st.dataframe(hitter_stats.style.background_gradient(axis=0).background_gradient(cmap = 'RdYlGn').background_gradient(cmap='RdYlGn_r', subset=['K%', 'Order', 'Salary']).format(hitter_format, precision=0).format(precision=3, subset = ['xBA', 'xSLG']), use_container_width = True)
|
306 |
-
elif stat_var1 == 'Current Slate Team Stats':
|
307 |
-
if site_var1 == 'Draftkings':
|
308 |
-
hitter_stats = load_table(data_hold, 'DK_Slate_Teams')
|
309 |
-
elif site_var1 == 'Fanduel':
|
310 |
-
hitter_stats = load_table(data_hold, 'FD_Slate_Teams')
|
311 |
-
hitter_stats.apply(pd.to_numeric, errors='ignore')
|
312 |
-
hitter_stats['Acro'] = hitter_stats['Team']
|
313 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
314 |
-
hitter_stats = hitter_stats.dropna(subset=['Opp_SP'])
|
315 |
-
hitter_stats = hitter_stats.sort_values(by='Event/PA', ascending=False)
|
316 |
-
hitter_stats = hitter_stats.set_index('Team')
|
317 |
-
hitter_stats = hitter_stats[hitter_stats['Acro'].isin(team_var1)]
|
318 |
-
st.dataframe(hitter_stats.style.background_gradient(axis=0).background_gradient(cmap = 'RdYlGn').background_gradient(cmap='RdYlGn_r', subset=['K%', 'Avg Salary']).format(hitter_format, precision=0).format(precision=3, subset = ['xBA', 'xSLG', 'Opp True AVG']), use_container_width = True)
|
319 |
-
elif stat_var1 == 'Team Trending Stats (Offense)':
|
320 |
-
hitter_stats = load_table(data_hold, 'Macro_Trending')
|
321 |
-
hitter_stats.apply(pd.to_numeric, errors='ignore')
|
322 |
-
for checkVar in range(len(wrong_acro)):
|
323 |
-
hitter_stats['Team'] = hitter_stats['Team'].replace(wrong_acro, right_acro)
|
324 |
-
hitter_stats['Acro'] = hitter_stats['Team']
|
325 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
326 |
-
hitter_stats = hitter_stats.dropna(subset=['Opp'])
|
327 |
-
hitter_stats = hitter_stats[['Team', 'Opp', 'Avg Score', 'Avg Score L5', 'Avg Score L10', 'Trending Score', '8+ For', '8+ For L5', '8+ For L10', 'Trending 8+ For', 'Acro']]
|
328 |
-
hitter_stats = hitter_stats.sort_values(by='Trending Score', ascending=False)
|
329 |
-
hitter_stats = hitter_stats.set_index('Team')
|
330 |
-
hitter_stats = hitter_stats[hitter_stats['Acro'].isin(team_var1)]
|
331 |
-
st.dataframe(hitter_stats.style.background_gradient(axis=0).background_gradient(cmap = 'RdYlGn').format(offense_format, precision=2), height=1200, use_container_width = True)
|
332 |
-
elif stat_var1 == 'Team Trending Stats (Defense)':
|
333 |
-
hitter_stats = load_table(data_hold, 'Macro_Trending')
|
334 |
-
hitter_stats.apply(pd.to_numeric, errors='ignore')
|
335 |
-
for checkVar in range(len(wrong_acro)):
|
336 |
-
hitter_stats['Team'] = hitter_stats['Team'].replace(wrong_acro, right_acro)
|
337 |
-
hitter_stats['Acro'] = hitter_stats['Team']
|
338 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
339 |
-
hitter_stats = hitter_stats.dropna(subset=['Opp'])
|
340 |
-
hitter_stats = hitter_stats[['Team', 'Opp', 'Avg Allowed', 'Avg Allowed L5', 'Avg Allowed L10', 'Trending Avg Allowed', '8+ Allowed', '8+ Allowed L5', '8+ Allowed L10', 'Trending 8+ Allowed', 'Acro']]
|
341 |
-
hitter_stats = hitter_stats.sort_values(by='Trending Avg Allowed', ascending=False)
|
342 |
-
hitter_stats = hitter_stats.set_index('Team')
|
343 |
-
hitter_stats = hitter_stats[hitter_stats['Acro'].isin(team_var1)]
|
344 |
-
st.dataframe(hitter_stats.style.background_gradient(axis=0).background_gradient(cmap = 'RdYlGn').format(defense_format, precision=2), height=1200, use_container_width = True)
|
345 |
-
elif stat_var1 == 'Team Trending Stats (Matchup ELO)':
|
346 |
-
hitter_stats = load_table(data_hold, 'Macro_Trending')
|
347 |
-
hitter_stats.apply(pd.to_numeric, errors='ignore')
|
348 |
-
for checkVar in range(len(wrong_acro)):
|
349 |
-
hitter_stats['Team'] = hitter_stats['Team'].replace(wrong_acro, right_acro)
|
350 |
-
hitter_stats['Acro'] = hitter_stats['Team']
|
351 |
-
hitter_stats.replace('', np.nan, inplace=True)
|
352 |
-
hitter_stats = hitter_stats.dropna(subset=['Opp'])
|
353 |
-
hitter_stats = hitter_stats[['Team', 'Opp', 'Avg Score', 'Avg Score L5', 'Avg Score L10', 'Trending Score', 'R2_to_Opp_szn', 'R2_to_Opp_sample', 'R2_to_Opp L5', 'R2_to_Opp L10', 'R2_to_Opp_Trend', 'Acro']]
|
354 |
-
hitter_stats = hitter_stats.sort_values(by='R2_to_Opp_Trend', ascending=False)
|
355 |
-
hitter_stats = hitter_stats.set_index('Team')
|
356 |
-
hitter_stats = hitter_stats[hitter_stats['Acro'].isin(team_var1)]
|
357 |
-
st.dataframe(hitter_stats.style.background_gradient(axis=0).background_gradient(cmap = 'RdYlGn').format(R2_format, precision=2), height=1200, use_container_width = True)
|
358 |
-
if stat_type_var1 == 'Pitchers':
|
359 |
-
st.download_button(
|
360 |
-
label="Export Tables",
|
361 |
-
data=convert_df_to_csv(pitcher_stats),
|
362 |
-
file_name='MLB_Research_export.csv',
|
363 |
-
mime='text/csv',
|
364 |
-
)
|
365 |
-
elif stat_type_var1 == 'Hitters':
|
366 |
-
st.download_button(
|
367 |
-
label="Export Tables",
|
368 |
-
data=convert_df_to_csv(hitter_stats),
|
369 |
-
file_name='MLB_Research_export.csv',
|
370 |
-
mime='text/csv',
|
371 |
-
)
|
|
|
1 |
+
import streamlit as st
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
+
import pymongo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
st.set_page_config(layout="wide")
|
7 |
|
8 |
+
@st.cache_resource
|
9 |
+
def init_conn():
|
10 |
+
uri = st.secrets['mongo_uri']
|
11 |
+
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=500000)
|
12 |
+
db = client["MLB_Database"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
return db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
db = init_conn()
|
17 |
+
|
18 |
+
st.markdown("""
|
19 |
+
<style>
|
20 |
+
/* Tab styling */
|
21 |
+
.stTabs [data-baseweb="tab-list"] {
|
22 |
+
gap: 8px;
|
23 |
+
padding: 4px;
|
24 |
+
}
|
25 |
+
.stTabs [data-baseweb="tab"] {
|
26 |
+
height: 50px;
|
27 |
+
white-space: pre-wrap;
|
28 |
+
background-color: #DAA520;
|
29 |
+
color: white;
|
30 |
+
border-radius: 10px;
|
31 |
+
gap: 1px;
|
32 |
+
padding: 10px 20px;
|
33 |
+
font-weight: bold;
|
34 |
+
transition: all 0.3s ease;
|
35 |
+
}
|
36 |
+
.stTabs [aria-selected="true"] {
|
37 |
+
background-color: #DAA520;
|
38 |
+
border: 3px solid #FFD700;
|
39 |
+
color: white;
|
40 |
+
}
|
41 |
+
.stTabs [data-baseweb="tab"]:hover {
|
42 |
+
background-color: #FFD700;
|
43 |
+
cursor: pointer;
|
44 |
+
}
|
45 |
+
div[data-baseweb="select"] > div {
|
46 |
+
background-color: #DAA520;
|
47 |
+
color: white;
|
48 |
+
}
|
49 |
+
div{
|
50 |
+
box-sizing: content-box !important;
|
51 |
+
}
|
52 |
+
</style>""", unsafe_allow_html=True)
|
53 |
+
|
54 |
+
@st.cache_resource(ttl = 61)
|
55 |
+
def init_baselines():
|
56 |
+
|
57 |
+
db_pulls = ['Bullpen_Data', 'Hitter_Agg_Merge', 'Hitter_Long_Merge', 'Hitter_Short_Merge', 'Pitcher_Agg_Merge', 'Pitcher_Long_Merge', 'Pitcher_Short_Merge',
|
58 |
+
'Slate_Hitters_Merge', 'Slate_Team_Merge', 'Starting_Pitchers', 'True_AVG_Split', 'Pitcher_Info', 'Hitter_Info']
|
59 |
+
|
60 |
+
for table in db_pulls:
|
61 |
+
collection = db[table]
|
62 |
+
cursor = collection.find()
|
63 |
+
df = pd.DataFrame(cursor)
|
64 |
+
|
65 |
+
if table == 'Bullpen_Data':
|
66 |
+
bp_data = df
|
67 |
+
elif table == 'Hitter_Agg_Merge':
|
68 |
+
hitter_agg = df
|
69 |
+
elif table == 'Hitter_Long_Merge':
|
70 |
+
hitter_long = df
|
71 |
+
elif table == 'Hitter_Short_Merge':
|
72 |
+
hitter_short = df
|
73 |
+
elif table == 'Pitcher_Agg_Merge':
|
74 |
+
pitcher_agg = df
|
75 |
+
elif table == 'Pitcher_Long_Merge':
|
76 |
+
pitcher_long = df
|
77 |
+
elif table == 'Pitcher_Short_Merge':
|
78 |
+
pitcher_short = df
|
79 |
+
elif table == 'Slate_Hitters_Merge':
|
80 |
+
slate_hitters = df
|
81 |
+
elif table == 'Slate_Team_Merge':
|
82 |
+
slate_team = df
|
83 |
+
elif table == 'Starting_Pitchers':
|
84 |
+
starting_pitchers = df
|
85 |
+
elif table == 'True_AVG_Split':
|
86 |
+
true_avg_split = df
|
87 |
+
elif table == 'Pitcher_Info':
|
88 |
+
pitcher_info = df
|
89 |
+
elif table == 'Hitter_Info':
|
90 |
+
hitter_info = df
|
91 |
+
|
92 |
+
return bp_data, hitter_agg, hitter_long, hitter_short, pitcher_agg, pitcher_long, pitcher_short, slate_hitters, slate_team, starting_pitchers, true_avg_split, pitcher_info, hitter_info
|
93 |
+
|
94 |
+
bp_data, hitter_agg, hitter_long, hitter_short, pitcher_agg, pitcher_long, pitcher_short, slate_hitters, slate_team, starting_pitchers, true_avg_split, pitcher_info, hitter_info = init_baselines()
|
95 |
+
|
96 |
+
pitcher_tab, hitter_tab, team_tab = st.tabs(['Pitchers', 'Hitters', 'Team'])
|
97 |
+
|
98 |
+
with pitcher_tab:
|
99 |
+
with st.expander('Info and Display Options'):
|
100 |
+
st.info('Note: Splits options are available for all baseline tables, they do not apply to True AVG, HWSr, or the Overview tables')
|
101 |
+
col1, col2, col3 = st.columns(3)
|
102 |
+
with col1:
|
103 |
+
site_var_sp = st.selectbox('Site', ['DraftKings', 'FanDuel'], key = 'site_var_sp')
|
104 |
+
with col2:
|
105 |
+
table_var_sp = st.selectbox('Table', ['True AVG Splits', 'HWSr Splits', 'Current Slate Overview', 'Active Baselines', 'League Aggregate Baselines', 'League Short Term Baselines', 'League Long Term Baselines'], key = 'table_var_sp')
|
106 |
+
with col3:
|
107 |
+
splits_var_sp = st.selectbox('Splits', ['Overall', 'RHH', 'LHH'], key = 'splits_var_sp')
|
108 |
|
109 |
+
if table_var_sp == 'True AVG Splits':
|
110 |
+
st.dataframe(true_avg_split)
|
111 |
+
elif table_var_sp == 'HWSr Splits':
|
112 |
+
st.dataframe(true_avg_split)
|
113 |
+
elif table_var_sp == 'Current Slate Overview':
|
114 |
+
st.dataframe(starting_pitchers)
|
115 |
+
elif table_var_sp == 'Active Baselines':
|
116 |
+
st.dataframe(pitcher_info)
|
117 |
+
elif table_var_sp == 'League Aggregate Baselines':
|
118 |
+
st.dataframe(pitcher_agg)
|
119 |
+
elif table_var_sp == 'League Short Term Baselines':
|
120 |
+
st.dataframe(pitcher_short)
|
121 |
+
elif table_var_sp == 'League Long Term Baselines':
|
122 |
+
st.dataframe(pitcher_long)
|
123 |
+
|
124 |
+
with hitter_tab:
|
125 |
+
with st.expander('Info and Display Options'):
|
126 |
+
st.info('Note: Splits options are available for all baseline tables')
|
127 |
+
col1, col2, col3 = st.columns(3)
|
128 |
+
with col1:
|
129 |
+
site_var_hitter = st.selectbox('Site', ['DraftKings', 'FanDuel'], key = 'site_var_hitter')
|
130 |
+
with col2:
|
131 |
+
table_var_hitter = st.selectbox('Table', ['Active Baselines', 'League Aggregate Baselines', 'League Short Term Baselines', 'League Long Term Baselines'], key = 'table_var_hitter')
|
132 |
+
with col3:
|
133 |
+
splits_var_hitter = st.selectbox('Splits', ['Overall', 'RHP', 'LHP'], key = 'splits_var_hitter')
|
134 |
+
|
135 |
+
if table_var_hitter == 'Current Slate Overview':
|
136 |
+
st.dataframe(starting_pitchers)
|
137 |
+
elif table_var_hitter == 'Active Baselines':
|
138 |
+
st.dataframe(hitter_info)
|
139 |
+
elif table_var_hitter == 'League Aggregate Baselines':
|
140 |
+
st.dataframe(hitter_agg)
|
141 |
+
elif table_var_hitter == 'League Short Term Baselines':
|
142 |
+
st.dataframe(hitter_short)
|
143 |
+
elif table_var_hitter == 'League Long Term Baselines':
|
144 |
+
st.dataframe(hitter_long)
|
145 |
+
|
146 |
+
with team_tab:
|
147 |
+
with st.expander('Info and Display Options'):
|
148 |
+
col1, col2, col3 = st.columns(3)
|
149 |
+
with col1:
|
150 |
+
site_var_team= st.selectbox('Site', ['DraftKings', 'FanDuel'], key = 'site_var_team')
|
151 |
+
with col2:
|
152 |
+
table_var_team = st.selectbox('Table', ['Team Baselines', 'Bullpen Baselines'], key = 'table_var_team')
|
153 |
+
|
154 |
+
if table_var_team == 'Team Baselines':
|
155 |
+
st.dataframe(slate_team)
|
156 |
+
elif table_var_team == 'Bullpen Baselines':
|
157 |
+
st.dataframe(bp_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|