Spaces:
Sleeping
Sleeping
File size: 8,674 Bytes
e6aed42 5f38c80 e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df e6aed42 aad53df 404a89f e6aed42 aad53df e6aed42 aad53df e6aed42 71c1236 0197b24 5f38c80 0197b24 5f38c80 0197b24 aad53df fd855e0 aad53df e6aed42 404a89f aad53df ad48ab1 aad53df fd855e0 c4ddc6d 404a89f c4ddc6d 404a89f c4ddc6d 404a89f |
|
import streamlit as st
st.set_page_config(layout="wide")
import pandas as pd
import gspread
import pymongo
import time
import numpy as np
from scipy.stats import poisson
@st.cache_resource
def init_conn():
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
credentials = {
"type": "service_account",
"project_id": "model-sheets-connect",
"private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n",
"client_email": "[email protected]",
"client_id": "100369174533302798535",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com"
}
MLB_Data = 'https://docs.google.com/spreadsheets/d/1f42Ergav8K1VsOLOK9MUn7DM_MLMvv4GR2Fy7EfnZTc/edit#gid=340831852'
gc_con = gspread.service_account_from_dict(credentials, scope)
return gc_con, MLB_Data
gcservice_account, MLB_Data = init_conn()
@st.cache_data(ttl = 599)
def init_baselines():
sh = gcservice_account.open_by_url(MLB_Data)
worksheet = sh.worksheet('Hitter_Data (RHP)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['PA'])
load_display = load_display.drop_duplicates(subset=['Player'], keep='first')
load_display= load_display.sort_values(by='Player', ascending=False)
hitter_rhp = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Hitter_Data (LHP)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['PA'])
load_display = load_display.drop_duplicates(subset=['Player'], keep='first')
load_display= load_display.sort_values(by='Player', ascending=False)
hitter_lhp = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Pitcher_Data (RHH)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['True AVG'])
load_display= load_display.sort_values(by='Names', ascending=False)
pitcher_rhh = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Pitcher_Data (LHH)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['True AVG'])
load_display= load_display.sort_values(by='Names', ascending=False)
pitcher_lhh = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Bullpen_xData')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['HWS Ratio'])
load_display= load_display.sort_values(by='Names', ascending=False)
bullpen_data = load_display.copy()
return hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data
@st.cache_resource
def calc_poisson(hitter_val, sp_val, bp_val, sp_count, bp_count):
base_val = hitter_val
opp_val = sp_val
sp_combo_val = sum([base_val, opp_val]) / 2
bp_combo_val = sum([base_val, bp_val]) / 2
sp_instances = sp_count
bp_instances = bp_count
sp_mean = sp_combo_val * sp_instances
bp_mean = bp_combo_val * bp_instances
# Generate a large number of samples from the Poisson distribution
SP_run = poisson.rvs(sp_mean, size=10000)
BP_run = poisson.rvs(bp_mean, size=10000)
# Calculate the sample mean
sp_outcome = np.mean(SP_run)
bp_outcome = np.mean(BP_run)
mean_outcome = sp_outcome + bp_outcome
return sp_outcome, bp_outcome, mean_outcome
hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data = init_baselines()
col1, col2 = st.columns([1, 7])
with col1:
if st.button("Load/Reset Data", key='reset1'):
st.cache_data.clear()
hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data = init_baselines()
pitcher_var1 = st.selectbox("Which pitcher are you looking at?", options = pitcher_rhh['Names'].unique())
working_pitcher = pitcher_rhh.copy()
pitcher_check = working_pitcher[working_pitcher['Names'] == pitcher_var1]
pitcher_hand = pitcher_check['Hand'].iloc[0]
if pitcher_hand == 'RHP':
hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_rhp['Player'].unique())
working_hitters = hitter_rhp.copy()
hitter_check = working_hitters[working_hitters['Player'] == hitter_var1]
else:
hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_lhp['Player'].unique())
working_hitters = hitter_lhp.copy()
hitter_check = working_hitters[working_hitters['Player'] == hitter_var1]
bullpen_var1 = st.selectbox("Which Bullpen are you looking at?", options = bullpen_data['Names'].unique())
working_bullpen = bullpen_data.copy()
bullpen_check = working_bullpen[working_bullpen['Names'] == bullpen_var1]
sp_count = st.number_input("How many PA against the Pitcher?", step = 1)
bp_count = st.number_input("How many PA against the Bullpen?", step = 1)
stat_var1 = st.selectbox("What Stat are you looking at?", options = ['Projected Walks', 'Projected Strikeouts', 'Projected HRs'])
with col2:
if st.button('calculate theoretical means'):
if stat_var1 == 'Projected Walks':
hitter_val = hitter_check['BB%'].iloc[0]
sp_val = pitcher_check['BB%'].iloc[0]
bp_val = bullpen_check['Walkper'].iloc[0] / 100
elif stat_var1 == 'Projected Strikeouts':
hitter_val = hitter_check['K%'].iloc[0]
sp_val = pitcher_check['K%'].iloc[0]
bp_val = bullpen_check['Strikeoutper'].iloc[0] / 100
elif stat_var1 == 'Projected HRs':
hitter_val = hitter_check['xHRs'].iloc[0] / hitter_check['PA'].iloc[0]
sp_val = pitcher_check['xHR/PA'].iloc[0]
bp_val = bullpen_check['Homeruns'].iloc[0] / bullpen_check['PA'].iloc[0]
value = calc_poisson(hitter_val, sp_val, bp_val, sp_count, bp_count)
st.table(hitter_check)
st.write(f"Theoretical mean of the SP instances: {value[0]}")
st.table(pitcher_check)
st.write(f"Theoretical mean of the BP instances: {value[1]}")
st.table(bullpen_check)
st.write(f"Sample mean from generated data: {value[2]}") |