Multichem's picture
Update app.py
c4ddc6d verified
import streamlit as st
st.set_page_config(layout="wide")
import pandas as pd
import gspread
import pymongo
import time
import numpy as np
from scipy.stats import poisson
@st.cache_resource
def init_conn():
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
credentials = {
"type": "service_account",
"project_id": "model-sheets-connect",
"private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n",
"client_email": "[email protected]",
"client_id": "100369174533302798535",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com"
}
MLB_Data = 'https://docs.google.com/spreadsheets/d/1f42Ergav8K1VsOLOK9MUn7DM_MLMvv4GR2Fy7EfnZTc/edit#gid=340831852'
gc_con = gspread.service_account_from_dict(credentials, scope)
return gc_con, MLB_Data
gcservice_account, MLB_Data = init_conn()
@st.cache_data(ttl = 599)
def init_baselines():
sh = gcservice_account.open_by_url(MLB_Data)
worksheet = sh.worksheet('Hitter_Data (RHP)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['PA'])
load_display = load_display.drop_duplicates(subset=['Player'], keep='first')
load_display= load_display.sort_values(by='Player', ascending=False)
hitter_rhp = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Hitter_Data (LHP)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['PA'])
load_display = load_display.drop_duplicates(subset=['Player'], keep='first')
load_display= load_display.sort_values(by='Player', ascending=False)
hitter_lhp = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Pitcher_Data (RHH)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['True AVG'])
load_display= load_display.sort_values(by='Names', ascending=False)
pitcher_rhh = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Pitcher_Data (LHH)')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['True AVG'])
load_display= load_display.sort_values(by='Names', ascending=False)
pitcher_lhh = load_display.copy()
time.sleep(.5)
worksheet = sh.worksheet('Bullpen_xData')
load_display = pd.DataFrame(worksheet.get_all_records())
load_display.replace('', np.nan, inplace=True)
load_display = load_display.dropna(subset=['HWS Ratio'])
load_display= load_display.sort_values(by='Names', ascending=False)
bullpen_data = load_display.copy()
return hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data
@st.cache_resource
def calc_poisson(hitter_val, sp_val, bp_val, sp_count, bp_count):
base_val = hitter_val
opp_val = sp_val
sp_combo_val = sum([base_val, opp_val]) / 2
bp_combo_val = sum([base_val, bp_val]) / 2
sp_instances = sp_count
bp_instances = bp_count
sp_mean = sp_combo_val * sp_instances
bp_mean = bp_combo_val * bp_instances
# Generate a large number of samples from the Poisson distribution
SP_run = poisson.rvs(sp_mean, size=10000)
BP_run = poisson.rvs(bp_mean, size=10000)
# Calculate the sample mean
sp_outcome = np.mean(SP_run)
bp_outcome = np.mean(BP_run)
mean_outcome = sp_outcome + bp_outcome
return sp_outcome, bp_outcome, mean_outcome
hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data = init_baselines()
col1, col2 = st.columns([1, 7])
with col1:
if st.button("Load/Reset Data", key='reset1'):
st.cache_data.clear()
hitter_rhp, hitter_lhp, pitcher_rhh, pitcher_lhh, bullpen_data = init_baselines()
pitcher_var1 = st.selectbox("Which pitcher are you looking at?", options = pitcher_rhh['Names'].unique())
working_pitcher = pitcher_rhh.copy()
pitcher_check = working_pitcher[working_pitcher['Names'] == pitcher_var1]
pitcher_hand = pitcher_check['Hand'].iloc[0]
if pitcher_hand == 'RHP':
hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_rhp['Player'].unique())
working_hitters = hitter_rhp.copy()
hitter_check = working_hitters[working_hitters['Player'] == hitter_var1]
else:
hitter_var1 = st.selectbox("What hitter are you looking at?", options = hitter_lhp['Player'].unique())
working_hitters = hitter_lhp.copy()
hitter_check = working_hitters[working_hitters['Player'] == hitter_var1]
bullpen_var1 = st.selectbox("Which Bullpen are you looking at?", options = bullpen_data['Names'].unique())
working_bullpen = bullpen_data.copy()
bullpen_check = working_bullpen[working_bullpen['Names'] == bullpen_var1]
sp_count = st.number_input("How many PA against the Pitcher?", step = 1)
bp_count = st.number_input("How many PA against the Bullpen?", step = 1)
stat_var1 = st.selectbox("What Stat are you looking at?", options = ['Projected Walks', 'Projected Strikeouts', 'Projected HRs'])
with col2:
if st.button('calculate theoretical means'):
if stat_var1 == 'Projected Walks':
hitter_val = hitter_check['BB%'].iloc[0]
sp_val = pitcher_check['BB%'].iloc[0]
bp_val = bullpen_check['Walkper'].iloc[0] / 100
elif stat_var1 == 'Projected Strikeouts':
hitter_val = hitter_check['K%'].iloc[0]
sp_val = pitcher_check['K%'].iloc[0]
bp_val = bullpen_check['Strikeoutper'].iloc[0] / 100
elif stat_var1 == 'Projected HRs':
hitter_val = hitter_check['xHRs'].iloc[0] / hitter_check['PA'].iloc[0]
sp_val = pitcher_check['xHR/PA'].iloc[0]
bp_val = bullpen_check['Homeruns'].iloc[0] / bullpen_check['PA'].iloc[0]
value = calc_poisson(hitter_val, sp_val, bp_val, sp_count, bp_count)
st.table(hitter_check)
st.write(f"Theoretical mean of the SP instances: {value[0]}")
st.table(pitcher_check)
st.write(f"Theoretical mean of the BP instances: {value[1]}")
st.table(bullpen_check)
st.write(f"Sample mean from generated data: {value[2]}")