Spaces:
Sleeping
Sleeping
File size: 8,674 Bytes
e6aed42 5f38c80 e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df 404a89f e6aed42 aad53df e6aed42 aad53df e6aed42 71c1236 0197b24 5f38c80 0197b24 5f38c80 0197b24 aad53df fd855e0 aad53df e6aed42 404a89f aad53df ad48ab1 aad53df fd855e0 c4ddc6d 404a89f c4ddc6d 404a89f c4ddc6d 404a89f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import streamlit as st
st.set_page_config(layout="wide")
import pandas as pd
import gspread
import pymongo
import time
import numpy as np
from scipy.stats import poisson
@st.cache_resource
def init_conn():
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
credentials = {
"type": "service_account",
"project_id": "model-sheets-connect",
"private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n",
"client_email": "[email protected]",
"client_id": "100369174533302798535",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com"
}
MLB_Data = 'https://docs.google.com/spreadsheets/d/1f42Ergav8K1VsOLOK9MUn7DM_MLMvv4GR2Fy7EfnZTc/edit#gid=340831852'
gc_con = gspread.service_account_from_dict(credentials, scope)
return gc_con, MLB_Data
gcservice_account, MLB_Data = init_conn()
@st.cache_data(ttl = 599)
def init_baselines():
sh = gcservice_account.open_by_url(MLB_Data)
worksheet = sh.worksheet('Hitter_Data (RHP)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['PA'])
load_display = load_display.drop_duplicates(subset=['Player'], keep='first')
load_display= load_display.sort_values(by='Player', ascending=False)
hitter_rhp = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Hitter_Data (LHP)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['PA'])
load_display = load_display.drop_duplicates(subset=['Player'], keep='first')
load_display= load_display.sort_values(by='Player', ascending=False)
hitter_lhp = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Pitcher_Data (RHH)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['True AVG'])
load_display= load_display.sort_values(by='Names', ascending=False)
pitcher_rhh = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Pitcher_Data (LHH)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['True AVG'])
load_display= load_display.sort_values(by='Names', ascending=False)
pitcher_lhh = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Bullpen_xData')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['HWS Ratio'])
load_display= load_display.sort_values(by='Names', ascending=False)
bullpen_data = load_display.copy()
return hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data
@st.cache_resource
def calc_poisson(hitter_val, sp_val, bp_val, sp_count, bp_count):
base_val = hitter_val
opp_val = sp_val
sp_combo_val = sum([base_val, opp_val]) / 2
bp_combo_val = sum([base_val, bp_val]) / 2
sp_instances = sp_count
bp_instances = bp_count
sp_mean = sp_combo_val * sp_instances
bp_mean = bp_combo_val * bp_instances
# Generate a large number of samples from the Poisson distribution
SP_run = poisson.rvs(sp_mean, size=10000)
BP_run = poisson.rvs(bp_mean, size=10000)
# Calculate the sample mean
sp_outcome = np.mean(SP_run)
bp_outcome = np.mean(BP_run)
mean_outcome = sp_outcome + bp_outcome
return sp_outcome, bp_outcome, mean_outcome
hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data = init_baselines()
col1, col2 = st.columns([1, 7])
with col1:
if st.button("Load/Reset Data", key='reset1'):
st.cache_data.clear()
hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data = init_baselines()
pitcher_var1 = st.selectbox("Which pitcher are you looking at?", options = pitcher_rhh['Names'].unique())
working_pitcher = pitcher_rhh.copy()
pitcher_check = working_pitcher[working_pitcher['Names'] == pitcher_var1]
pitcher_hand = pitcher_check['Hand'].iloc[0]
if pitcher_hand == 'RHP':
hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_rhp['Player'].unique())
working_hitters = hitter_rhp.copy()
hitter_check = working_hitters[working_hitters['Player'] == hitter_var1]
else:
hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_lhp['Player'].unique())
working_hitters = hitter_lhp.copy()
hitter_check = working_hitters[working_hitters['Player'] == hitter_var1]
bullpen_var1 = st.selectbox("Which Bullpen are you looking at?", options = bullpen_data['Names'].unique())
working_bullpen = bullpen_data.copy()
bullpen_check = working_bullpen[working_bullpen['Names'] == bullpen_var1]
sp_count = st.number_input("How many PA against the Pitcher?", step = 1)
bp_count = st.number_input("How many PA against the Bullpen?", step = 1)
stat_var1 = st.selectbox("What Stat are you looking at?", options = ['Projected Walks', 'Projected Strikeouts', 'Projected HRs'])
with col2:
if st.button('calculate theoretical means'):
if stat_var1 == 'Projected Walks':
hitter_val = hitter_check['BB%'].iloc[0]
sp_val = pitcher_check['BB%'].iloc[0]
bp_val = bullpen_check['Walkper'].iloc[0] / 100
elif stat_var1 == 'Projected Strikeouts':
hitter_val = hitter_check['K%'].iloc[0]
sp_val = pitcher_check['K%'].iloc[0]
bp_val = bullpen_check['Strikeoutper'].iloc[0] / 100
elif stat_var1 == 'Projected HRs':
hitter_val = hitter_check['xHRs'].iloc[0] / hitter_check['PA'].iloc[0]
sp_val = pitcher_check['xHR/PA'].iloc[0]
bp_val = bullpen_check['Homeruns'].iloc[0] / bullpen_check['PA'].iloc[0]
value = calc_poisson(hitter_val, sp_val, bp_val, sp_count, bp_count)
st.table(hitter_check)
st.write(f"Theoretical mean of the SP instances: {value[0]}")
st.table(pitcher_check)
st.write(f"Theoretical mean of the BP instances: {value[1]}")
st.table(bullpen_check)
st.write(f"Sample mean from generated data: {value[2]}") |