MLB_Gamelogs / app.py
Multichem's picture
Update app.py
0c8a239 verified
import streamlit as st
st.set_page_config(layout="wide")
for name in dir():
if not name.startswith('_'):
del globals()[name]
import numpy as np
import pandas as pd
import streamlit as st
import gspread
import pymongo
import time
from io import BytesIO
from pymongo.mongo_client import MongoClient
import matplotlib.pyplot as plt
import certifi
ca = certifi.where()
@st.cache_resource
def init_conn():
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
credentials = {
"type": "service_account",
"project_id": "model-sheets-connect",
"private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n",
"client_email": "[email protected]",
"client_id": "100369174533302798535",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com"
}
uri = "mongodb+srv://multichem:[email protected]/?retryWrites=true&w=majority&appName=TestCluster"
client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=100000)
db = client["testing_db"]
gc_con = gspread.service_account_from_dict(credentials, scope)
return gc_con, client, db
gcservice_account, client, db = init_conn()
percentages_format = {'PG': '{:.2%}', 'SG': '{:.2%}', 'SF': '{:.2%}', 'PF': '{:.2%}', 'C': '{:.2%}'}
@st.cache_resource(ttl = 599)
def init_baselines():
collection = db["MLB_Hitters_DB"]
cursor = collection.find() # Finds all documents in the collection
raw_display = pd.DataFrame(list(cursor))
hitter_gamelog_table = raw_display[['NameASCII', 'Team', 'Date', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']]
hitter_gamelog_table['Date'] = pd.to_datetime(hitter_gamelog_table['Date'])
hitter_gamelog_table['Date'] = hitter_gamelog_table['Date'].dt.date
data_cols = hitter_gamelog_table.columns.drop(['NameASCII', 'Team', 'Date'])
hitter_gamelog_table[data_cols] = hitter_gamelog_table[data_cols].apply(pd.to_numeric, errors='coerce')
hitter_gamelog_table = hitter_gamelog_table.set_axis(['Player', 'Team', 'Date', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'], axis=1)
collection = db["MLB_Pitchers_DB"]
cursor = collection.find() # Finds all documents in the collection
raw_display = pd.DataFrame(list(cursor))
pitcher_gamelog_table = raw_display[['NameASCII', 'Team', 'Date', 'G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA (sc)', 'vFT (sc)', 'vFC (sc)', 'vFS (sc)', 'vFO (sc)', 'vSI (sc)',
'vSL (sc)', 'vCU (sc)', 'vKC (sc)', 'vEP (sc)', 'vCH (sc)', 'vSC (sc)', 'vKN (sc)']]
pitcher_gamelog_table.replace("", np.nan, inplace=True)
pitcher_gamelog_table['Date'] = pd.to_datetime(pitcher_gamelog_table['Date'])
pitcher_gamelog_table['Date'] = pitcher_gamelog_table['Date'].dt.date
data_cols = pitcher_gamelog_table.columns.drop(['NameASCII', 'Team', 'Date'])
pitcher_gamelog_table[data_cols] = pitcher_gamelog_table[data_cols].apply(pd.to_numeric, errors='coerce')
pitcher_gamelog_table = pitcher_gamelog_table.set_axis(['Player', 'Team', 'Date', 'G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'], axis=1)
timestamp = pitcher_gamelog_table['Date'].max()
return hitter_gamelog_table, pitcher_gamelog_table, timestamp
@st.cache_data(show_spinner=False)
def hitter_seasonlong_build(data_sample):
season_long_table = data_sample[['Player', 'Team']]
season_long_table['G'] = data_sample.groupby(['Player', 'Team'], sort=False)['G'].transform('sum').astype(int)
season_long_table['AB'] = data_sample.groupby(['Player', 'Team'], sort=False)['AB'].transform('sum').astype(int)
season_long_table['PA'] = data_sample.groupby(['Player', 'Team'], sort=False)['PA'].transform('sum').astype(int)
season_long_table['H'] = data_sample.groupby(['Player', 'Team'], sort=False)['H'].transform('sum').astype(int)
season_long_table['1B'] = data_sample.groupby(['Player', 'Team'], sort=False)['1B'].transform('sum').astype(int)
season_long_table['2B'] = data_sample.groupby(['Player', 'Team'], sort=False)['2B'].transform('sum').astype(int)
season_long_table['3B'] = data_sample.groupby(['Player', 'Team'], sort=False)['3B'].transform('sum').astype(int)
season_long_table['HR'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR'].transform('sum').astype(int)
season_long_table['R'] = data_sample.groupby(['Player', 'Team'], sort=False)['R'].transform('sum').astype(int)
season_long_table['RBI'] = data_sample.groupby(['Player', 'Team'], sort=False)['RBI'].transform('sum').astype(int)
season_long_table['BB'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB'].transform('sum').astype(int)
season_long_table['IBB'] = data_sample.groupby(['Player', 'Team'], sort=False)['IBB'].transform('sum').astype(int)
season_long_table['SO'] = data_sample.groupby(['Player', 'Team'], sort=False)['SO'].transform('sum').astype(int)
season_long_table['HBP'] = data_sample.groupby(['Player', 'Team'], sort=False)['HBP'].transform('sum').astype(int)
season_long_table['SF'] = data_sample.groupby(['Player', 'Team'], sort=False)['SF'].transform('sum').astype(int)
season_long_table['SH'] = data_sample.groupby(['Player', 'Team'], sort=False)['SH'].transform('sum').astype(int)
season_long_table['GDP'] = data_sample.groupby(['Player', 'Team'], sort=False)['GDP'].transform('sum').astype(int)
season_long_table['SB'] = data_sample.groupby(['Player', 'Team'], sort=False)['SB'].transform('sum').astype(int)
season_long_table['CS'] = data_sample.groupby(['Player', 'Team'], sort=False)['CS'].transform('sum').astype(int)
season_long_table['Avg AVG'] = data_sample.groupby(['Player', 'Team'], sort=False)['AVG'].transform('mean').astype(float)
season_long_table['Avg SLG'] = data_sample.groupby(['Player', 'Team'], sort=False)['SLG'].transform('mean').astype(float)
season_long_table['Avg wRC+'] = data_sample.groupby(['Player', 'Team'], sort=False)['wRC+'].transform('mean').astype(float)
season_long_table['Avg LD%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LD%'].transform('mean').astype(float)
season_long_table['Avg GB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['GB%'].transform('mean').astype(float)
season_long_table['Avg FB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['FB%'].transform('mean').astype(float)
season_long_table['Avg Hard%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Hard%'].transform('mean').astype(float)
season_long_table['Barrels'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrels'].transform('sum').astype(int)
season_long_table['Avg Barrel%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrel%'].transform('mean').astype(float)
season_long_table = season_long_table.drop_duplicates(subset='Player')
season_long_table = season_long_table.sort_values(by='Avg wRC+', ascending=False)
season_long_table = season_long_table.set_axis(['Player', 'Team', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'], axis=1)
return season_long_table
@st.cache_data(show_spinner=False)
def hitter_team_build(data_sample):
season_long_table = data_sample[['Team']]
season_long_table['G'] = data_sample.groupby(['Team'], sort=False)['G'].transform('sum').astype(int)
season_long_table['AB'] = data_sample.groupby(['Team'], sort=False)['AB'].transform('sum').astype(int)
season_long_table['PA'] = data_sample.groupby(['Team'], sort=False)['PA'].transform('sum').astype(int)
season_long_table['H'] = data_sample.groupby(['Team'], sort=False)['H'].transform('sum').astype(int)
season_long_table['1B'] = data_sample.groupby(['Team'], sort=False)['1B'].transform('sum').astype(int)
season_long_table['2B'] = data_sample.groupby(['Team'], sort=False)['2B'].transform('sum').astype(int)
season_long_table['3B'] = data_sample.groupby(['Team'], sort=False)['3B'].transform('sum').astype(int)
season_long_table['HR'] = data_sample.groupby(['Team'], sort=False)['HR'].transform('sum').astype(int)
season_long_table['R'] = data_sample.groupby(['Team'], sort=False)['R'].transform('sum').astype(int)
season_long_table['RBI'] = data_sample.groupby(['Team'], sort=False)['RBI'].transform('sum').astype(int)
season_long_table['BB'] = data_sample.groupby(['Team'], sort=False)['BB'].transform('sum').astype(int)
season_long_table['IBB'] = data_sample.groupby(['Team'], sort=False)['IBB'].transform('sum').astype(int)
season_long_table['SO'] = data_sample.groupby(['Team'], sort=False)['SO'].transform('sum').astype(int)
season_long_table['HBP'] = data_sample.groupby(['Team'], sort=False)['HBP'].transform('sum').astype(int)
season_long_table['SF'] = data_sample.groupby(['Team'], sort=False)['SF'].transform('sum').astype(int)
season_long_table['SH'] = data_sample.groupby(['Team'], sort=False)['SH'].transform('sum').astype(int)
season_long_table['GDP'] = data_sample.groupby(['Team'], sort=False)['GDP'].transform('sum').astype(int)
season_long_table['SB'] = data_sample.groupby(['Team'], sort=False)['SB'].transform('sum').astype(int)
season_long_table['CS'] = data_sample.groupby(['Team'], sort=False)['CS'].transform('sum').astype(int)
season_long_table['Avg AVG'] = data_sample.groupby(['Team'], sort=False)['AVG'].transform('mean').astype(float)
season_long_table['Avg SLG'] = data_sample.groupby(['Team'], sort=False)['SLG'].transform('mean').astype(float)
season_long_table['Avg wRC+'] = data_sample.groupby(['Team'], sort=False)['wRC+'].transform('mean').astype(float)
season_long_table['Avg LD%'] = data_sample.groupby(['Team'], sort=False)['LD%'].transform('mean').astype(float)
season_long_table['Avg GB%'] = data_sample.groupby(['Team'], sort=False)['GB%'].transform('mean').astype(float)
season_long_table['Avg FB%'] = data_sample.groupby(['Team'], sort=False)['FB%'].transform('mean').astype(float)
season_long_table['Avg Hard%'] = data_sample.groupby(['Team'], sort=False)['Hard%'].transform('mean').astype(float)
season_long_table['Barrels'] = data_sample.groupby(['Team'], sort=False)['Barrels'].transform('sum').astype(int)
season_long_table['Avg Barrel%'] = data_sample.groupby(['Team'], sort=False)['Barrel%'].transform('mean').astype(float)
season_long_table = season_long_table.drop_duplicates(subset='Team')
season_long_table = season_long_table.sort_values(by='Avg wRC+', ascending=False)
season_long_table = season_long_table.set_axis(['Team', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'], axis=1)
return season_long_table
@st.cache_data(show_spinner=False)
def pitcher_seasonlong_build(data_sample):
season_long_table = data_sample[['Player', 'Team']]
season_long_table['G'] = data_sample.groupby(['Player', 'Team'], sort=False)['G'].transform('sum').astype(int)
season_long_table['GS'] = data_sample.groupby(['Player', 'Team'], sort=False)['GS'].transform('sum').astype(int)
season_long_table['CG'] = data_sample.groupby(['Player', 'Team'], sort=False)['CG'].transform('sum').astype(int)
season_long_table['W'] = data_sample.groupby(['Player', 'Team'], sort=False)['W'].transform('sum').astype(int)
season_long_table['L'] = data_sample.groupby(['Player', 'Team'], sort=False)['L'].transform('sum').astype(int)
season_long_table['Avg ERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['ERA'].transform('mean').astype(float)
season_long_table['ShO'] = data_sample.groupby(['Player', 'Team'], sort=False)['ShO'].transform('sum').astype(int)
season_long_table['SV'] = data_sample.groupby(['Player', 'Team'], sort=False)['SV'].transform('sum').astype(int)
season_long_table['HLD'] = data_sample.groupby(['Player', 'Team'], sort=False)['HLD'].transform('sum').astype(int)
season_long_table['BS'] = data_sample.groupby(['Player', 'Team'], sort=False)['BS'].transform('sum').astype(int)
season_long_table['IP'] = data_sample.groupby(['Player', 'Team'], sort=False)['IP'].transform('sum').astype(int)
season_long_table['TBF'] = data_sample.groupby(['Player', 'Team'], sort=False)['TBF'].transform('sum').astype(int)
season_long_table['H'] = data_sample.groupby(['Player', 'Team'], sort=False)['H'].transform('sum').astype(int)
season_long_table['R'] = data_sample.groupby(['Player', 'Team'], sort=False)['R'].transform('sum').astype(int)
season_long_table['ER'] = data_sample.groupby(['Player', 'Team'], sort=False)['ER'].transform('sum').astype(int)
season_long_table['HR'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR'].transform('sum').astype(int)
season_long_table['BB'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB'].transform('sum').astype(int)
season_long_table['IBB'] = data_sample.groupby(['Player', 'Team'], sort=False)['IBB'].transform('sum').astype(int)
season_long_table['HBP'] = data_sample.groupby(['Player', 'Team'], sort=False)['HBP'].transform('sum').astype(int)
season_long_table['WP'] = data_sample.groupby(['Player', 'Team'], sort=False)['WP'].transform('sum').astype(int)
season_long_table['BK'] = data_sample.groupby(['Player', 'Team'], sort=False)['BK'].transform('sum').astype(int)
season_long_table['SO'] = data_sample.groupby(['Player', 'Team'], sort=False)['SO'].transform('sum').astype(int)
season_long_table['Avg K/9'] = data_sample.groupby(['Player', 'Team'], sort=False)['K/9'].transform('mean').astype(float)
season_long_table['Avg BB/9'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB/9'].transform('mean').astype(float)
season_long_table['Avg WHIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['WHIP'].transform('mean').astype(float)
season_long_table['Avg BABIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['BABIP'].transform('mean').astype(float)
season_long_table['Avg LOB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LOB%'].transform('mean').astype(int)
season_long_table['Avg FIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['FIP'].transform('mean').astype(float)
season_long_table['Avg xFIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['xFIP'].transform('mean').astype(float)
season_long_table['Avg K%'] = data_sample.groupby(['Player', 'Team'], sort=False)['K%'].transform('mean').astype(float)
season_long_table['Avg BB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB%'].transform('mean').astype(float)
season_long_table['Avg SIERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['SIERA'].transform('mean').astype(float)
season_long_table['Avg LD%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LD%'].transform('mean').astype(float)
season_long_table['Avg GB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['GB%'].transform('mean').astype(float)
season_long_table['Avg FB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['FB%'].transform('mean').astype(float)
season_long_table['Avg HR/FB'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR/FB'].transform('mean').astype(float)
season_long_table['Avg Hard%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Hard%'].transform('mean').astype(float)
season_long_table['Barrels'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrels'].transform('sum').astype(int)
season_long_table['Avg Barrel%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrel%'].transform('mean').astype(float)
season_long_table['Avg xERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['xERA'].transform('mean').astype(float)
season_long_table['Avg vFA'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFA'].transform('mean').astype(float)
season_long_table['Avg vFT'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFT'].transform('mean').astype(float)
season_long_table['Avg vFC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFC'].transform('mean').astype(float)
season_long_table['Avg vFS'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFS'].transform('mean').astype(float)
season_long_table['Avg vFO'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFO'].transform('mean').astype(float)
season_long_table['Avg vSI'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSI'].transform('mean').astype(float)
season_long_table['Avg vSL'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSL'].transform('mean').astype(float)
season_long_table['Avg vCU'] = data_sample.groupby(['Player', 'Team'], sort=False)['vCU'].transform('mean').astype(float)
season_long_table['Avg vKC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vKC'].transform('mean').astype(float)
season_long_table['Avg vEP'] = data_sample.groupby(['Player', 'Team'], sort=False)['vEP'].transform('mean').astype(float)
season_long_table['Avg vCH'] = data_sample.groupby(['Player', 'Team'], sort=False)['vCH'].transform('mean').astype(float)
season_long_table['Avg vSC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSC'].transform('mean').astype(float)
season_long_table['Avg vKN'] = data_sample.groupby(['Player', 'Team'], sort=False)['vKN'].transform('mean').astype(float)
season_long_table = season_long_table.drop_duplicates(subset='Player')
season_long_table = season_long_table.sort_values(by='SO', ascending=False)
season_long_table = season_long_table.set_axis(['Player', 'Team', 'G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'], axis=1)
return season_long_table
@st.cache_data(show_spinner=False)
def pitcher_team_build(data_sample):
season_long_table = data_sample[['Team']]
season_long_table['G'] = data_sample.groupby(['Team'], sort=False)['G'].transform('sum').astype(int)
season_long_table['GS'] = data_sample.groupby(['Team'], sort=False)['GS'].transform('sum').astype(int)
season_long_table['CG'] = data_sample.groupby(['Team'], sort=False)['CG'].transform('sum').astype(int)
season_long_table['W'] = data_sample.groupby(['Team'], sort=False)['W'].transform('sum').astype(int)
season_long_table['L'] = data_sample.groupby(['Team'], sort=False)['L'].transform('sum').astype(int)
season_long_table['Avg ERA'] = data_sample.groupby(['Team'], sort=False)['ERA'].transform('mean').astype(float)
season_long_table['ShO'] = data_sample.groupby(['Team'], sort=False)['ShO'].transform('sum').astype(int)
season_long_table['SV'] = data_sample.groupby(['Team'], sort=False)['SV'].transform('sum').astype(int)
season_long_table['HLD'] = data_sample.groupby(['Team'], sort=False)['HLD'].transform('sum').astype(int)
season_long_table['BS'] = data_sample.groupby(['Team'], sort=False)['BS'].transform('sum').astype(int)
season_long_table['IP'] = data_sample.groupby(['Team'], sort=False)['IP'].transform('sum').astype(int)
season_long_table['TBF'] = data_sample.groupby(['Team'], sort=False)['TBF'].transform('sum').astype(int)
season_long_table['H'] = data_sample.groupby(['Team'], sort=False)['H'].transform('sum').astype(int)
season_long_table['R'] = data_sample.groupby(['Team'], sort=False)['R'].transform('sum').astype(int)
season_long_table['ER'] = data_sample.groupby(['Team'], sort=False)['ER'].transform('sum').astype(int)
season_long_table['HR'] = data_sample.groupby(['Team'], sort=False)['HR'].transform('sum').astype(int)
season_long_table['BB'] = data_sample.groupby(['Team'], sort=False)['BB'].transform('sum').astype(int)
season_long_table['IBB'] = data_sample.groupby(['Team'], sort=False)['IBB'].transform('sum').astype(int)
season_long_table['HBP'] = data_sample.groupby(['Team'], sort=False)['HBP'].transform('sum').astype(int)
season_long_table['WP'] = data_sample.groupby(['Team'], sort=False)['WP'].transform('sum').astype(int)
season_long_table['BK'] = data_sample.groupby(['Team'], sort=False)['BK'].transform('sum').astype(int)
season_long_table['SO'] = data_sample.groupby(['Team'], sort=False)['SO'].transform('sum').astype(int)
season_long_table['Avg K/9'] = data_sample.groupby(['Team'], sort=False)['K/9'].transform('mean').astype(float)
season_long_table['Avg BB/9'] = data_sample.groupby(['Team'], sort=False)['BB/9'].transform('mean').astype(float)
season_long_table['Avg WHIP'] = data_sample.groupby(['Team'], sort=False)['WHIP'].transform('mean').astype(float)
season_long_table['Avg BABIP'] = data_sample.groupby(['Team'], sort=False)['BABIP'].transform('mean').astype(float)
season_long_table['Avg LOB%'] = data_sample.groupby(['Team'], sort=False)['LOB%'].transform('mean').astype(int)
season_long_table['Avg FIP'] = data_sample.groupby(['Team'], sort=False)['FIP'].transform('mean').astype(float)
season_long_table['Avg xFIP'] = data_sample.groupby(['Team'], sort=False)['xFIP'].transform('mean').astype(float)
season_long_table['Avg K%'] = data_sample.groupby(['Team'], sort=False)['K%'].transform('mean').astype(float)
season_long_table['Avg BB%'] = data_sample.groupby(['Team'], sort=False)['BB%'].transform('mean').astype(float)
season_long_table['Avg SIERA'] = data_sample.groupby(['Team'], sort=False)['SIERA'].transform('mean').astype(float)
season_long_table['Avg LD%'] = data_sample.groupby(['Team'], sort=False)['LD%'].transform('mean').astype(float)
season_long_table['Avg GB%'] = data_sample.groupby(['Team'], sort=False)['GB%'].transform('mean').astype(float)
season_long_table['Avg FB%'] = data_sample.groupby(['Team'], sort=False)['FB%'].transform('mean').astype(float)
season_long_table['Avg HR/FB'] = data_sample.groupby(['Team'], sort=False)['HR/FB'].transform('mean').astype(float)
season_long_table['Avg Hard%'] = data_sample.groupby(['Team'], sort=False)['Hard%'].transform('mean').astype(float)
season_long_table['Barrels'] = data_sample.groupby(['Team'], sort=False)['Barrels'].transform('sum').astype(int)
season_long_table['Avg Barrel%'] = data_sample.groupby(['Team'], sort=False)['Barrel%'].transform('mean').astype(float)
season_long_table['Avg xERA'] = data_sample.groupby(['Team'], sort=False)['xERA'].transform('mean').astype(float)
season_long_table['Avg vFA'] = data_sample.groupby(['Team'], sort=False)['vFA'].transform('mean').astype(float)
season_long_table['Avg vFT'] = data_sample.groupby(['Team'], sort=False)['vFT'].transform('mean').astype(float)
season_long_table['Avg vFC'] = data_sample.groupby(['Team'], sort=False)['vFC'].transform('mean').astype(float)
season_long_table['Avg vFS'] = data_sample.groupby(['Team'], sort=False)['vFS'].transform('mean').astype(float)
season_long_table['Avg vFO'] = data_sample.groupby(['Team'], sort=False)['vFO'].transform('mean').astype(float)
season_long_table['Avg vSI'] = data_sample.groupby(['Team'], sort=False)['vSI'].transform('mean').astype(float)
season_long_table['Avg vSL'] = data_sample.groupby(['Team'], sort=False)['vSL'].transform('mean').astype(float)
season_long_table['Avg vCU'] = data_sample.groupby(['Team'], sort=False)['vCU'].transform('mean').astype(float)
season_long_table['Avg vKC'] = data_sample.groupby(['Team'], sort=False)['vKC'].transform('mean').astype(float)
season_long_table['Avg vEP'] = data_sample.groupby(['Team'], sort=False)['vEP'].transform('mean').astype(float)
season_long_table['Avg vCH'] = data_sample.groupby(['Team'], sort=False)['vCH'].transform('mean').astype(float)
season_long_table['Avg vSC'] = data_sample.groupby(['Team'], sort=False)['vSC'].transform('mean').astype(float)
season_long_table['Avg vKN'] = data_sample.groupby(['Team'], sort=False)['vKN'].transform('mean').astype(float)
season_long_table = season_long_table.drop_duplicates(subset='Team')
season_long_table = season_long_table.sort_values(by='SO', ascending=False)
season_long_table = season_long_table.set_axis(['Team', 'G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'], axis=1)
return season_long_table
@st.cache_data(show_spinner=False)
def split_frame(input_df, rows):
df = [input_df.loc[i : i + rows - 1, :] for i in range(0, len(input_df), rows)]
return df
def convert_df_to_csv(df):
return df.to_csv().encode('utf-8')
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines()
t_stamp = f"Updated through: " + str(timestamp) + f" CST"
basic_cols = ['Player', 'Team', 'Date']
basic_season_cols = ['Player', 'Team', 'Date']
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%']
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN']
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN']
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team')
total_teams = indv_teams.Team.values.tolist()
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player')
total_hitters = indv_hitters.Player.values.tolist()
indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player')
total_pitchers = indv_pitchers.Player.values.tolist()
total_dates = hitter_gamelog_table.Date.values.tolist()
tab1, tab2, tab3 = st.tabs(['Hitter Gamelogs', 'Pitcher Gamelogs', 'Sample Graphs'])
with tab1:
st.info(t_stamp)
col1, col2 = st.columns([1, 9])
with col1:
if st.button("Reset Data", key='reset1'):
st.cache_data.clear()
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines()
t_stamp = f"Updated through: " + str(timestamp) + f" CST"
basic_cols = ['Player', 'Team', 'Date']
basic_season_cols = ['Player', 'Team', 'Date']
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%']
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN']
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN']
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team')
total_teams = indv_teams.Team.values.tolist()
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player')
total_hitters = indv_hitters.Player.values.tolist()
indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player')
total_pitchers = indv_pitchers.Player.values.tolist()
total_dates = hitter_gamelog_table.Date.values.tolist()
split_var1 = st.radio("What table would you like to view?", ('Season Logs', 'Game logs', 'Team Logs'), key='split_var1')
split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='split_var2')
if split_var2 == 'Specific Teams':
team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='team_var1')
elif split_var2 == 'All':
team_var1 = total_teams
split_var3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates'), key='split_var3')
if split_var3 == 'Specific Dates':
low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='low_date')
if low_date is not None:
low_date = pd.to_datetime(low_date).date()
high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='high_date')
if high_date is not None:
high_date = pd.to_datetime(high_date).date()
elif split_var3 == 'All':
low_date = hitter_gamelog_table['Date'].min()
high_date = hitter_gamelog_table['Date'].max()
split_var4 = st.radio("Would you like to view all players or specific ones?", ('All', 'Specific Players'), key='split_var4')
if split_var4 == 'Specific Players':
player_var1 = st.multiselect('Which players would you like to include in the tables?', options = total_hitters, key='player_var1')
elif split_var4 == 'All':
player_var1 = total_hitters
with col2:
working_data = hitter_gamelog_table
if split_var1 == 'Season Logs':
choose_cols = st.container()
with choose_cols:
choose_disp = st.multiselect('Which stats would you like to view?', options = season_hitter_data_cols, default = season_hitter_data_cols, key='col_display')
disp_stats = basic_season_cols + choose_disp
display = st.container()
working_data = working_data[working_data['Date'] >= low_date]
working_data = working_data[working_data['Date'] <= high_date]
working_data = working_data[working_data['Team'].isin(team_var1)]
working_data = working_data[working_data['Player'].isin(player_var1)]
season_long_table = hitter_seasonlong_build(working_data)
season_long_table = season_long_table.set_index('Player')
season_long_table_disp = season_long_table.reindex(disp_stats,axis="columns")
season_long_table_disp = season_long_table_disp.drop(['Player', 'Date'], axis=1)
display.dataframe(season_long_table_disp.style.format(precision=2), height=750, use_container_width = True)
st.download_button(
label="Export hitter seasonlogs Model",
data=convert_df_to_csv(season_long_table),
file_name='Seasonlogs_Hitter_View.csv',
mime='text/csv',
)
elif split_var1 == 'Team Logs':
choose_cols = st.container()
with choose_cols:
choose_disp = st.multiselect('Which stats would you like to view?', options = season_hitter_data_cols, default = season_hitter_data_cols, key='col_display')
disp_stats = basic_season_cols + choose_disp
display = st.container()
working_data = working_data[working_data['Date'] >= low_date]
working_data = working_data[working_data['Date'] <= high_date]
working_data = working_data[working_data['Team'].isin(team_var1)]
team_table = hitter_team_build(working_data)
team_table = team_table.set_index('Team')
team_table_disp = team_table.reindex(disp_stats,axis="columns")
team_table_disp = team_table_disp.drop(['Team', 'Date', 'Player'], axis=1)
display.dataframe(team_table_disp.style.format(precision=2), height=750, use_container_width = True)
st.download_button(
label="Export hitter team logs Model",
data=convert_df_to_csv(team_table),
file_name='Seasonlogs_Hitter_View.csv',
mime='text/csv',
)
elif split_var1 == 'Game logs':
choose_cols = st.container()
with choose_cols:
choose_disp_gamelog = st.multiselect('Which stats would you like to view?', options = hitter_data_cols, default = hitter_data_cols, key='choose_disp_gamelog')
gamelog_disp_stats = basic_cols + choose_disp_gamelog
working_data = working_data[working_data['Date'] >= low_date]
working_data = working_data[working_data['Date'] <= high_date]
working_data = working_data[working_data['Team'].isin(team_var1)]
working_data = working_data[working_data['Player'].isin(player_var1)]
working_data = working_data.reset_index(drop=True)
gamelog_data = working_data.reindex(gamelog_disp_stats,axis="columns")
display = st.container()
bottom_menu = st.columns((4, 1, 1))
with bottom_menu[2]:
batch_size = st.selectbox("Page Size", options=[25, 50, 100], key='hitter_pagination')
with bottom_menu[1]:
total_pages = (
int(len(gamelog_data) / batch_size) if int(len(gamelog_data) / batch_size) > 0 else 1
)
current_page = st.number_input(
"Page", min_value=1, max_value=total_pages, step=1
)
with bottom_menu[0]:
st.markdown(f"Page **{current_page}** of **{total_pages}** ")
pages = split_frame(gamelog_data, batch_size)
# pages = pages.set_index('Player')
if len(player_var1) > 0:
display.dataframe(data=pages[current_page - 1].style.format(precision=2), height=500, use_container_width=True)
st.download_button(
label="Export hitter gamelogs model",
data=convert_df_to_csv(gamelog_data),
file_name='Gamelogs_Hitter_View.csv',
mime='text/csv',
)
with tab2:
st.info(t_stamp)
col1, col2 = st.columns([1, 9])
with col1:
if st.button("Reset Data", key='reset2'):
st.cache_data.clear()
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines()
t_stamp = f"Updated through: " + str(timestamp) + f" CST"
basic_cols = ['Player', 'Team', 'Date']
basic_season_cols = ['Player', 'Team', 'Date']
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%']
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN']
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN']
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team')
total_teams = indv_teams.Team.values.tolist()
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player')
total_hitters = indv_hitters.Player.values.tolist()
indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player')
total_pitchers = indv_pitchers.Player.values.tolist()
total_dates = hitter_gamelog_table.Date.values.tolist()
sp_split_var1 = st.radio("What table would you like to view?", ('Season Logs', 'Gamelogs'), key='sp_split_var1')
sp_split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='sp_split_var2')
if sp_split_var2 == 'Specific Teams':
sp_team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='sp_team_var1')
elif sp_split_var2 == 'All':
sp_team_var1 = total_teams
sp_split_var3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates'), key='sp_split_var3')
if sp_split_var3 == 'Specific Dates':
sp_low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='sp_low_date')
if sp_low_date is not None:
sp_low_date = pd.to_datetime(sp_low_date).date()
sp_high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='sp_high_date')
if sp_high_date is not None:
sp_high_date = pd.to_datetime(sp_high_date).date()
elif sp_split_var3 == 'All':
sp_low_date = pitcher_gamelog_table['Date'].min()
sp_high_date = pitcher_gamelog_table['Date'].max()
sp_split_var4 = st.radio("Would you like to view all players or specific ones?", ('All', 'Specific Players'), key='sp_split_var4')
if sp_split_var4 == 'Specific Players':
sp_player_var1 = st.multiselect('Which players would you like to include in the tables?', options = total_pitchers, key='sp_player_var1')
elif sp_split_var4 == 'All':
sp_player_var1 = total_pitchers
with col2:
working_data = pitcher_gamelog_table
if sp_split_var1 == 'Season Logs':
choose_cols = st.container()
with choose_cols:
sp_choose_disp = st.multiselect('Which stats would you like to view?', options = season_pitcher_data_cols, default = season_pitcher_data_cols, key='sp_col_display')
disp_stats = basic_season_cols + sp_choose_disp
display = st.container()
working_data = working_data[working_data['Date'] >= sp_low_date]
working_data = working_data[working_data['Date'] <= sp_high_date]
working_data = working_data[working_data['Team'].isin(sp_team_var1)]
working_data = working_data[working_data['Player'].isin(sp_player_var1)]
season_long_table = pitcher_seasonlong_build(working_data)
season_long_table = season_long_table.set_index('Player')
season_long_table_disp = season_long_table.reindex(disp_stats,axis="columns")
season_long_table_disp = season_long_table_disp.drop(['Player', 'Date'], axis=1)
display.dataframe(season_long_table_disp.style.format(precision=2), height=750, use_container_width = True)
st.download_button(
label="Export pitcher seasonlogs Model",
data=convert_df_to_csv(season_long_table),
file_name='Seasonlogs_Pitcher_View.csv',
mime='text/csv',
)
elif sp_split_var1 == 'Gamelogs':
choose_cols = st.container()
with choose_cols:
sp_choose_disp_gamelog = st.multiselect('Which stats would you like to view?', options = pitcher_data_cols, default = pitcher_data_cols, key='sp_choose_disp_gamelog')
gamelog_disp_stats = basic_cols + sp_choose_disp_gamelog
working_data = working_data[working_data['Date'] >= sp_low_date]
working_data = working_data[working_data['Date'] <= sp_high_date]
working_data = working_data[working_data['Team'].isin(sp_team_var1)]
working_data = working_data[working_data['Player'].isin(sp_player_var1)]
working_data = working_data.reset_index(drop=True)
gamelog_data = working_data.reindex(gamelog_disp_stats,axis="columns")
display = st.container()
bottom_menu = st.columns((4, 1, 1))
with bottom_menu[2]:
batch_size = st.selectbox("Page Size", options=[25, 50, 100], key='pitcher_pagination')
with bottom_menu[1]:
total_pages = (
int(len(gamelog_data) / batch_size) if int(len(gamelog_data) / batch_size) > 0 else 1
)
current_page = st.number_input(
"Page", min_value=1, max_value=total_pages, step=1
)
with bottom_menu[0]:
st.markdown(f"Page **{current_page}** of **{total_pages}** ")
pages = split_frame(gamelog_data, batch_size)
# pages = pages.set_index('Player')
display.dataframe(data=pages[current_page - 1].style.format(precision=2), height=500, use_container_width=True)
st.download_button(
label="Export pitcher gamelogs model",
data=convert_df_to_csv(gamelog_data),
file_name='Gamelogs_Hitter_View.csv',
mime='text/csv',
)
with tab3:
st.info(t_stamp)
st.info("Note when creating graphs with multiple stats: The LEFT y-axis will be locked to values of the first stat you choose, while the RIGHT y-axis will be locked to the values of the second or third stat you chose depending on wether you are viewing Two or Three stats. So, to maximize the use of the graphs, you'll want to make sure that you are using compatible stats. I.E. use percentages together like GB% and FB% or average based stats like AVG and BABIP")
col1, col2, col3 = st.columns([1, 7, 2])
with col1:
if st.button("Reset Data", key='reset3'):
st.cache_data.clear()
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines()
t_stamp = f"Updated through: " + str(timestamp) + f" CST"
basic_cols = ['Player', 'Team', 'Date']
basic_season_cols = ['Player', 'Team', 'Date']
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%']
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN']
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN']
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team')
total_teams = indv_teams.Team.values.tolist()
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player')
total_hitters = indv_hitters.Player.values.tolist()
indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player')
total_pitchers = indv_pitchers.Player.values.tolist()
total_dates = hitter_gamelog_table.Date.values.tolist()
plot_type = st.radio("Are you viewing hitter or pitcher stats?", ('Pitcher', 'Hitter'), key='plot_type')
if plot_type == "Pitcher":
player_drop = total_pitchers
stat_drop = pitcher_data_cols
working_data = pitcher_gamelog_table
elif plot_type == "Hitter":
player_drop = total_hitters
stat_drop = hitter_data_cols
working_data = hitter_gamelog_table
player_var3 = st.selectbox("Which player are you viewing?", player_drop, key='player_var3')
plot_count = st.radio("how many stats would you like to plot?", ('One', 'Two', 'Three'), key='plot_count')
if plot_count == "One":
plot_var1 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var1')
elif plot_count == "Two":
plot_var1 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var1')
plot_var2 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var2')
elif plot_count == "Three":
plot_var1 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var1')
plot_var2 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var2')
plot_var3 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var3')
date_var_3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates', '5-day Averages', '10-day Averages'), key='date_var_3')
if date_var_3 == 'Specific Dates':
plot_low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='plot_low_date')
if plot_low_date is not None:
plot_low_date = pd.to_datetime(plot_low_date).date()
plot_high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='plot_high_date')
if plot_high_date is not None:
plot_high_date = pd.to_datetime(plot_high_date).date()
else:
plot_low_date = pitcher_gamelog_table['Date'].min()
plot_high_date = pitcher_gamelog_table['Date'].max()
with col2:
working_data = working_data[working_data['Date'] >= sp_low_date]
working_data = working_data[working_data['Date'] <= sp_high_date]
working_data = working_data[working_data['Team'].isin(sp_team_var1)]
working_data = working_data[working_data['Player'] == player_var3]
if date_var_3 == '5-day Averages':
if plot_count == "One":
working_data['Date'] = pd.to_datetime(working_data['Date'])
working_data = working_data.set_index('Date')
working_data = working_data[plot_var1].rolling('5D').mean()
working_data = working_data.reset_index()
elif plot_count == "Two":
working_data['Date'] = pd.to_datetime(working_data['Date'])
working_data = working_data.set_index('Date')
working_data1 = working_data[plot_var1].rolling('5D').mean()
working_data2 = working_data[plot_var2].rolling('5D').mean()
join_list = [working_data1, working_data2]
working_data = pd.concat(join_list, join='outer', axis=1)
working_data = working_data.reset_index()
elif plot_count == "Three":
working_data['Date'] = pd.to_datetime(working_data['Date'])
working_data = working_data.set_index('Date')
working_data1 = working_data[plot_var1].rolling('5D').mean()
working_data2 = working_data[plot_var2].rolling('5D').mean()
working_data3 = working_data[plot_var3].rolling('5D').mean()
join_list = [working_data1, working_data2, working_data3]
working_data = pd.concat(join_list, join='outer', axis=1)
working_data = working_data.reset_index()
elif date_var_3 == '10-day Averages':
if plot_count == "One":
working_data['Date'] = pd.to_datetime(working_data['Date'])
working_data = working_data.set_index('Date')
working_data = working_data[plot_var1].rolling('10D').mean()
working_data = working_data.reset_index()
elif plot_count == "Two":
working_data['Date'] = pd.to_datetime(working_data['Date'])
working_data = working_data.set_index('Date')
working_data1 = working_data[plot_var1].rolling('10D').mean()
working_data2 = working_data[plot_var2].rolling('10D').mean()
join_list = [working_data1, working_data2]
working_data = pd.concat(join_list, join='outer', axis=1)
working_data = working_data.reset_index()
elif plot_count == "Three":
working_data['Date'] = pd.to_datetime(working_data['Date'])
working_data = working_data.set_index('Date')
working_data1 = working_data[plot_var1].rolling('10D').mean()
working_data2 = working_data[plot_var2].rolling('10D').mean()
working_data3 = working_data[plot_var3].rolling('10D').mean()
join_list = [working_data1, working_data2, working_data3]
working_data = pd.concat(join_list, join='outer', axis=1)
working_data = working_data.reset_index()
else:
working_data = working_data
if plot_count == "One":
graph_data = working_data.reindex(['Date', plot_var1],axis="columns")
disp_data = working_data
fig, ax1 = plt.subplots(figsize=(20, 10), layout='tight')
color = 'tab:blue'
ax1.set_xlabel('Date')
ax1.set_ylabel(plot_var1, color = color)
ax1.plot(graph_data['Date'], graph_data[plot_var1], color = color)
ax1.tick_params(axis ='y', labelcolor = color)
buf = BytesIO()
fig.savefig(buf, format="png")
st.image(buf)
elif plot_count == "Two":
graph_data = working_data.reindex(['Date', plot_var1, plot_var2],axis="columns")
disp_data = working_data
fig, ax1 = plt.subplots(figsize=(20, 10), layout='tight')
color = 'tab:blue'
ax1.set_xlabel('Date')
ax1.set_ylabel(plot_var1, color = color)
ax1.plot(graph_data['Date'], graph_data[plot_var1], color = color)
ax1.tick_params(axis ='y', labelcolor = color)
ax2 = ax1.twinx()
color = 'tab:green'
ax2.set_ylabel(plot_var2, color = color)
ax2.plot(graph_data['Date'], graph_data[plot_var2], color = color)
ax2.tick_params(axis ='y', labelcolor = color)
fig.legend([plot_var1, plot_var2], loc="upper right")
buf = BytesIO()
fig.savefig(buf, format="png")
st.image(buf)
elif plot_count == "Three":
graph_data = working_data.reindex(['Date', plot_var1, plot_var2, plot_var3],axis="columns")
disp_data = working_data
fig, ax1 = plt.subplots(figsize=(20, 10), layout='tight')
color = 'tab:blue'
color2 = 'tab:orange'
ax1.set_xlabel('Date')
ax1.set_ylabel(str(plot_var1 + " / " + plot_var2), color = color)
ax1.plot(graph_data['Date'], graph_data[plot_var1], color = color)
ax1.plot(graph_data['Date'], graph_data[plot_var2], color = color2)
ax1.tick_params(axis ='y', labelcolor = color)
ax2 = ax1.twinx()
color = 'tab:green'
ax2.set_ylabel(plot_var3, color = color)
ax2.plot(graph_data['Date'], graph_data[plot_var3], color = color)
ax2.tick_params(axis ='y', labelcolor = color)
fig.legend([plot_var1, plot_var2, plot_var3], loc="upper right")
buf = BytesIO()
fig.savefig(buf, format="png")
st.image(buf)
with col3:
st.table(disp_data)