Spaces:
Sleeping
Sleeping
File size: 6,880 Bytes
e6aed42 5f38c80 e6aed42 404a89f e6aed42 404a89f e6aed42 71c1236 0197b24 5f38c80 0197b24 5f38c80 0197b24 404a89f e6aed42 404a89f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
st.set_page_config(layout="wide")
import pandas as pd
import gspread
import pymongo
import time
import numpy as np
from scipy.stats import poisson
@st.cache_resource
def init_conn():
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
credentials = {
"type": "service_account",
"project_id": "model-sheets-connect",
"private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n",
"client_email": "[email protected]",
"client_id": "100369174533302798535",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com"
}
MLB_Data = 'https://docs.google.com/spreadsheets/d/1f42Ergav8K1VsOLOK9MUn7DM_MLMvv4GR2Fy7EfnZTc/edit#gid=340831852'
gc_con = gspread.service_account_from_dict(credentials, scope)
return gc_con, MLB_Data
gcservice_account, MLB_Data = init_conn()
@st.cache_data(ttl = 599)
def init_baselines():
sh = gcservice_account.open_by_url(MLB_Data)
worksheet = sh.worksheet('Hitter_Data (RHP)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['PA'])
load_display = load_display.drop_duplicates(subset=['Player'], keep='first')
hitter_rhp = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Hitter_Data (LHP)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['PA'])
load_display = load_display.drop_duplicates(subset=['Player'], keep='first')
hitter_lhp = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Pitcher_Data (RHH)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['True AVG'])
pitcher_rhh = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Pitcher_Data (LHH)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['True AVG'])
pitcher_lhh = load_display.copy()
return hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh
@st.cache_resource
def calc_poisson(hitter_val, sp_val, sp_count, bp_count):
base_val = hitter_val
opp_val = sp_val
sp_combo_val = sum([base_val, opp_val]) / 2
bp_combo_val = sum([base_val, .085]) / 2
sp_instances = sp_count
bp_instances = bp_count
sp_mean = sp_combo_val * sp_instances
bp_mean = bp_combo_val * bp_instances
# Generate a large number of samples from the Poisson distribution
SP_run = poisson.rvs(sp_mean, size=10000)
BP_run = poisson.rvs(bp_mean, size=10000)
# Calculate the sample mean
sp_outcome = np.mean(SP_run)
bp_outcome = np.mean(BP_run)
mean_outcome = sp_outcome + bp_outcome
return sp_outcome, bp_outcome, mean_outcome
hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh = init_baselines()
col1, col2 = st.columns([1, 7])
with col1:
if st.button("Load/Reset Data", key='reset1'):
st.cache_data.clear()
hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh = init_baselines()
pitcher_var1 = st.selectbox("Which pitcher are you looking at?", options = pitcher_rhh['Names'].unique())
working_pitcher = pitcher_rhh.copy()
pitcher_check = working_pitcher[working_pitcher['Names'] == pitcher_var1]
pitcher_hand = pitcher_check['Hand'].iloc[0]
if pitcher_hand == 'RHP':
hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_rhp['Player'].unique())
working_hitters = hitter_rhp.copy()
hitter_check = working_hitters[working_hitters['Player'] == hitter_var1]
else:
hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_lhp['Player'].unique())
working_hitters = hitter_lhp.copy()
hitter_check = working_hitters[working_hitters['Player'] == hitter_var1]
sp_count = st.number_input("How many PA against the Pitcher?")
bp_count = st.number_input("How many PA against the Bullpen?")
with col2:
if st.button('calculate theoretical means'):
hitter_val = hitter_check['BB%'].iloc[0]
sp_val = pitcher_check['BB%'].iloc[0]
value = calc_poisson(hitter_val, sp_val, sp_count, bp_count)
st.write(f"Theoretical mean of the SP instances: {value[0]}")
st.write(f"Theoretical mean of the BP instances: {value[1]}")
st.write(f"Sample mean from generated data: {value[2]}") |