MLB_Gamelogs / app.py
Multichem's picture
Update app.py
02c26df verified
raw
history blame
36.5 kB
import streamlit as st
st.set_page_config(layout="wide")
for name in dir():
if not name.startswith('_'):
del globals()[name]
import numpy as np
import pandas as pd
import streamlit as st
import gspread
import gc
import plotly.express as px
import plotly.io as pio
import pymongo
import certifi
ca = certifi.where()
@st.cache_resource
def init_conn():
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
credentials = {
"type": "service_account",
"project_id": "model-sheets-connect",
"private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n",
"client_email": "[email protected]",
"client_id": "100369174533302798535",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com"
}
client = pymongo.MongoClient("mongodb+srv://multichem:[email protected]/testing_db")
db = client["testing_db"]
gc_con = gspread.service_account_from_dict(credentials, scope)
return gc_con, client, db
gcservice_account, client, db = init_conn()
percentages_format = {'PG': '{:.2%}', 'SG': '{:.2%}', 'SF': '{:.2%}', 'PF': '{:.2%}', 'C': '{:.2%}'}
@st.cache_resource(ttl = 600)
def init_baselines():
collection = db["MLB_Hitters_DB"]
cursor = collection.find() # Finds all documents in the collection
raw_display = pd.DataFrame(list(cursor))
hitter_gamelog_table = raw_display[['NameASCII', 'Team', 'Date', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']]
hitter_gamelog_table['Date'] = pd.to_datetime(hitter_gamelog_table['Date'])
hitter_gamelog_table['Date'] = hitter_gamelog_table['Date'].dt.date
data_cols = hitter_gamelog_table.columns.drop(['NameASCII', 'Team', 'Date'])
hitter_gamelog_table[data_cols] = hitter_gamelog_table[data_cols].apply(pd.to_numeric, errors='coerce')
hitter_gamelog_table = hitter_gamelog_table.set_axis(['Player', 'Team', 'Date', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'], axis=1)
collection = db["MLB_Pitchers_DB"]
cursor = collection.find() # Finds all documents in the collection
raw_display = pd.DataFrame(list(cursor))
pitcher_gamelog_table = raw_display[['NameASCII', 'Team', 'Date', 'G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA (sc)', 'vFT (sc)', 'vFC (sc)', 'vFS (sc)', 'vFO (sc)', 'vSI (sc)',
'vSL (sc)', 'vCU (sc)', 'vKC (sc)', 'vEP (sc)', 'vCH (sc)', 'vSC (sc)', 'vKN (sc)']]
pitcher_gamelog_table.replace("", np.nan, inplace=True)
pitcher_gamelog_table['Date'] = pd.to_datetime(pitcher_gamelog_table['Date'])
pitcher_gamelog_table['Date'] = pitcher_gamelog_table['Date'].dt.date
data_cols = pitcher_gamelog_table.columns.drop(['NameASCII', 'Team', 'Date'])
pitcher_gamelog_table[data_cols] = pitcher_gamelog_table[data_cols].apply(pd.to_numeric, errors='coerce')
pitcher_gamelog_table = pitcher_gamelog_table.set_axis(['Player', 'Team', 'Date', 'G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'], axis=1)
timestamp = pitcher_gamelog_table['Date'].max()
return hitter_gamelog_table, pitcher_gamelog_table, timestamp
@st.cache_data(show_spinner=False)
def hitter_seasonlong_build(data_sample):
season_long_table = data_sample[['Player', 'Team']]
season_long_table['G'] = data_sample.groupby(['Player', 'Team'], sort=False)['G'].transform('sum').astype(int)
season_long_table['AB'] = data_sample.groupby(['Player', 'Team'], sort=False)['AB'].transform('sum').astype(int)
season_long_table['PA'] = data_sample.groupby(['Player', 'Team'], sort=False)['PA'].transform('sum').astype(int)
season_long_table['H'] = data_sample.groupby(['Player', 'Team'], sort=False)['H'].transform('sum').astype(int)
season_long_table['1B'] = data_sample.groupby(['Player', 'Team'], sort=False)['1B'].transform('sum').astype(int)
season_long_table['2B'] = data_sample.groupby(['Player', 'Team'], sort=False)['2B'].transform('sum').astype(int)
season_long_table['3B'] = data_sample.groupby(['Player', 'Team'], sort=False)['3B'].transform('sum').astype(int)
season_long_table['HR'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR'].transform('sum').astype(int)
season_long_table['R'] = data_sample.groupby(['Player', 'Team'], sort=False)['R'].transform('sum').astype(int)
season_long_table['RBI'] = data_sample.groupby(['Player', 'Team'], sort=False)['RBI'].transform('sum').astype(int)
season_long_table['BB'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB'].transform('sum').astype(int)
season_long_table['IBB'] = data_sample.groupby(['Player', 'Team'], sort=False)['IBB'].transform('sum').astype(int)
season_long_table['SO'] = data_sample.groupby(['Player', 'Team'], sort=False)['SO'].transform('sum').astype(int)
season_long_table['HBP'] = data_sample.groupby(['Player', 'Team'], sort=False)['HBP'].transform('sum').astype(int)
season_long_table['SF'] = data_sample.groupby(['Player', 'Team'], sort=False)['SF'].transform('sum').astype(int)
season_long_table['SH'] = data_sample.groupby(['Player', 'Team'], sort=False)['SH'].transform('sum').astype(int)
season_long_table['GDP'] = data_sample.groupby(['Player', 'Team'], sort=False)['GDP'].transform('sum').astype(int)
season_long_table['SB'] = data_sample.groupby(['Player', 'Team'], sort=False)['SB'].transform('sum').astype(int)
season_long_table['CS'] = data_sample.groupby(['Player', 'Team'], sort=False)['CS'].transform('sum').astype(int)
season_long_table['Avg AVG'] = data_sample.groupby(['Player', 'Team'], sort=False)['AVG'].transform('mean').astype(float)
season_long_table['Avg SLG'] = data_sample.groupby(['Player', 'Team'], sort=False)['SLG'].transform('mean').astype(float)
season_long_table['Avg wRC+'] = data_sample.groupby(['Player', 'Team'], sort=False)['wRC+'].transform('mean').astype(float)
season_long_table['Avg LD%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LD%'].transform('mean').astype(float)
season_long_table['Avg GB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['GB%'].transform('mean').astype(float)
season_long_table['Avg FB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['FB%'].transform('mean').astype(float)
season_long_table['Avg Hard%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Hard%'].transform('mean').astype(float)
season_long_table['Barrels'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrels'].transform('sum').astype(int)
season_long_table['Avg Barrel%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrel%'].transform('mean').astype(float)
season_long_table = season_long_table.drop_duplicates(subset='Player')
season_long_table = season_long_table.sort_values(by='Avg wRC+', ascending=False)
season_long_table = season_long_table.set_axis(['Player', 'Team', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'], axis=1)
return season_long_table
@st.cache_data(show_spinner=False)
def pitcher_seasonlong_build(data_sample):
season_long_table = data_sample[['Player', 'Team']]
season_long_table['G'] = data_sample.groupby(['Player', 'Team'], sort=False)['G'].transform('sum').astype(int)
season_long_table['GS'] = data_sample.groupby(['Player', 'Team'], sort=False)['GS'].transform('sum').astype(int)
season_long_table['CG'] = data_sample.groupby(['Player', 'Team'], sort=False)['CG'].transform('sum').astype(int)
season_long_table['W'] = data_sample.groupby(['Player', 'Team'], sort=False)['W'].transform('sum').astype(int)
season_long_table['L'] = data_sample.groupby(['Player', 'Team'], sort=False)['L'].transform('sum').astype(int)
season_long_table['Avg ERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['ERA'].transform('mean').astype(float)
season_long_table['ShO'] = data_sample.groupby(['Player', 'Team'], sort=False)['ShO'].transform('sum').astype(int)
season_long_table['SV'] = data_sample.groupby(['Player', 'Team'], sort=False)['SV'].transform('sum').astype(int)
season_long_table['HLD'] = data_sample.groupby(['Player', 'Team'], sort=False)['HLD'].transform('sum').astype(int)
season_long_table['BS'] = data_sample.groupby(['Player', 'Team'], sort=False)['BS'].transform('sum').astype(int)
season_long_table['IP'] = data_sample.groupby(['Player', 'Team'], sort=False)['IP'].transform('sum').astype(int)
season_long_table['TBF'] = data_sample.groupby(['Player', 'Team'], sort=False)['TBF'].transform('sum').astype(int)
season_long_table['H'] = data_sample.groupby(['Player', 'Team'], sort=False)['H'].transform('sum').astype(int)
season_long_table['R'] = data_sample.groupby(['Player', 'Team'], sort=False)['R'].transform('sum').astype(int)
season_long_table['ER'] = data_sample.groupby(['Player', 'Team'], sort=False)['ER'].transform('sum').astype(int)
season_long_table['HR'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR'].transform('sum').astype(int)
season_long_table['BB'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB'].transform('sum').astype(int)
season_long_table['IBB'] = data_sample.groupby(['Player', 'Team'], sort=False)['IBB'].transform('sum').astype(int)
season_long_table['HBP'] = data_sample.groupby(['Player', 'Team'], sort=False)['HBP'].transform('sum').astype(int)
season_long_table['WP'] = data_sample.groupby(['Player', 'Team'], sort=False)['WP'].transform('sum').astype(int)
season_long_table['BK'] = data_sample.groupby(['Player', 'Team'], sort=False)['BK'].transform('sum').astype(int)
season_long_table['SO'] = data_sample.groupby(['Player', 'Team'], sort=False)['SO'].transform('sum').astype(int)
season_long_table['Avg K/9'] = data_sample.groupby(['Player', 'Team'], sort=False)['K/9'].transform('mean').astype(float)
season_long_table['Avg BB/9'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB/9'].transform('mean').astype(float)
season_long_table['Avg WHIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['WHIP'].transform('mean').astype(float)
season_long_table['Avg BABIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['BABIP'].transform('mean').astype(float)
season_long_table['Avg LOB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LOB%'].transform('mean').astype(int)
season_long_table['Avg FIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['FIP'].transform('mean').astype(float)
season_long_table['Avg xFIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['xFIP'].transform('mean').astype(float)
season_long_table['Avg K%'] = data_sample.groupby(['Player', 'Team'], sort=False)['K%'].transform('mean').astype(float)
season_long_table['Avg BB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB%'].transform('mean').astype(float)
season_long_table['Avg SIERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['SIERA'].transform('mean').astype(float)
season_long_table['Avg LD%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LD%'].transform('mean').astype(float)
season_long_table['Avg GB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['GB%'].transform('mean').astype(float)
season_long_table['Avg FB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['FB%'].transform('mean').astype(float)
season_long_table['Avg HR/FB'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR/FB'].transform('mean').astype(float)
season_long_table['Avg Hard%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Hard%'].transform('mean').astype(float)
season_long_table['Barrels'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrels'].transform('sum').astype(int)
season_long_table['Avg Barrel%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrel%'].transform('mean').astype(float)
season_long_table['Avg xERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['xERA'].transform('mean').astype(float)
season_long_table['Avg vFA'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFA'].transform('mean').astype(float)
season_long_table['Avg vFT'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFT'].transform('mean').astype(float)
season_long_table['Avg vFC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFC'].transform('mean').astype(float)
season_long_table['Avg vFS'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFS'].transform('mean').astype(float)
season_long_table['Avg vFO'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFO'].transform('mean').astype(float)
season_long_table['Avg vSI'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSI'].transform('mean').astype(float)
season_long_table['Avg vSL'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSL'].transform('mean').astype(float)
season_long_table['Avg vCU'] = data_sample.groupby(['Player', 'Team'], sort=False)['vCU'].transform('mean').astype(float)
season_long_table['Avg vKC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vKC'].transform('mean').astype(float)
season_long_table['Avg vEP'] = data_sample.groupby(['Player', 'Team'], sort=False)['vEP'].transform('mean').astype(float)
season_long_table['Avg vCH'] = data_sample.groupby(['Player', 'Team'], sort=False)['vCH'].transform('mean').astype(float)
season_long_table['Avg vSC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSC'].transform('mean').astype(float)
season_long_table['Avg vKN'] = data_sample.groupby(['Player', 'Team'], sort=False)['vKN'].transform('mean').astype(float)
season_long_table = season_long_table.drop_duplicates(subset='Player')
season_long_table = season_long_table.sort_values(by='SO', ascending=False)
season_long_table = season_long_table.set_axis(['Player', 'Team', 'G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'], axis=1)
return season_long_table
@st.cache_data(show_spinner=False)
def split_frame(input_df, rows):
df = [input_df.loc[i : i + rows - 1, :] for i in range(0, len(input_df), rows)]
return df
def convert_df_to_csv(df):
return df.to_csv().encode('utf-8')
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines()
t_stamp = f"Updated through: " + str(timestamp) + f" CST"
basic_cols = ['Player', 'Team']
basic_season_cols = ['Player', 'Team']
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%']
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN']
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN']
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team')
total_teams = indv_teams.Team.values.tolist()
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player')
total_hitters = indv_hitters.Player.values.tolist()
indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player')
total_pitchers = indv_pitchers.Player.values.tolist()
total_dates = hitter_gamelog_table.Date.values.tolist()
tab1, tab2 = st.tabs(['Hitter Gamelogs', 'Pitcher Gamelogs'])
with tab1:
st.info(t_stamp)
col1, col2 = st.columns([1, 9])
with col1:
if st.button("Reset Data", key='reset1'):
st.cache_data.clear()
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines()
t_stamp = f"Updated through: " + str(timestamp) + f" CST"
basic_cols = ['Player', 'Team', 'Date']
basic_season_cols = ['Player', 'Team', 'Date']
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%']
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN']
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN']
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team')
total_teams = indv_teams.Team.values.tolist()
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player')
total_hitters = indv_hitters.Player.values.tolist()
indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player')
total_pitchers = indv_pitchers.Player.values.tolist()
total_dates = hitter_gamelog_table.Date.values.tolist()
split_var1 = st.radio("What table would you like to view?", ('Gamelogs', 'Season Logs'), key='split_var1')
split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='split_var2')
if split_var2 == 'Specific Teams':
team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='team_var1')
elif split_var2 == 'All':
team_var1 = total_teams
split_var3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates'), key='split_var3')
if split_var3 == 'Specific Dates':
low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='low_date')
if low_date is not None:
low_date = pd.to_datetime(low_date).date()
high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='high_date')
if high_date is not None:
high_date = pd.to_datetime(high_date).date()
elif split_var3 == 'All':
low_date = hitter_gamelog_table['Date'].min()
high_date = hitter_gamelog_table['Date'].max()
split_var4 = st.radio("Would you like to view all players or specific ones?", ('All', 'Specific Players'), key='split_var4')
if split_var4 == 'Specific Players':
player_var1 = st.multiselect('Which players would you like to include in the tables?', options = total_hitters, key='player_var1')
elif split_var4 == 'All':
player_var1 = total_hitters
with col2:
working_data = hitter_gamelog_table
if split_var1 == 'Season Logs':
choose_cols = st.container()
with choose_cols:
choose_disp = st.multiselect('Which stats would you like to view?', options = season_hitter_data_cols, default = season_hitter_data_cols, key='col_display')
disp_stats = basic_season_cols + choose_disp
display = st.container()
working_data = working_data[working_data['Date'] >= low_date]
working_data = working_data[working_data['Date'] <= high_date]
working_data = working_data[working_data['Team'].isin(team_var1)]
working_data = working_data[working_data['Player'].isin(player_var1)]
season_long_table = hitter_seasonlong_build(working_data)
season_long_table = season_long_table.set_index('Player')
season_long_table_disp = season_long_table.reindex(disp_stats,axis="columns")
display.dataframe(season_long_table_disp.style.format(precision=2), height=750, use_container_width = True)
st.download_button(
label="Export hitter seasonlogs Model",
data=convert_df_to_csv(season_long_table),
file_name='Seasonlogs_Hitter_View.csv',
mime='text/csv',
)
elif split_var1 == 'Gamelogs':
choose_cols = st.container()
with choose_cols:
choose_disp_gamelog = st.multiselect('Which stats would you like to view?', options = hitter_data_cols, default = hitter_data_cols, key='choose_disp_gamelog')
gamelog_disp_stats = basic_cols + choose_disp_gamelog
working_data = working_data[working_data['Date'] >= low_date]
working_data = working_data[working_data['Date'] <= high_date]
working_data = working_data[working_data['Team'].isin(team_var1)]
working_data = working_data[working_data['Player'].isin(player_var1)]
working_data = working_data.reset_index(drop=True)
gamelog_data = working_data.reindex(gamelog_disp_stats,axis="columns")
display = st.container()
bottom_menu = st.columns((4, 1, 1))
with bottom_menu[2]:
batch_size = st.selectbox("Page Size", options=[25, 50, 100])
with bottom_menu[1]:
total_pages = (
int(len(gamelog_data) / batch_size) if int(len(gamelog_data) / batch_size) > 0 else 1
)
current_page = st.number_input(
"Page", min_value=1, max_value=total_pages, step=1
)
with bottom_menu[0]:
st.markdown(f"Page **{current_page}** of **{total_pages}** ")
pages = split_frame(gamelog_data, batch_size)
# pages = pages.set_index('Player')
display.dataframe(data=pages[current_page - 1].style.format(precision=2), height=500, use_container_width=True)
st.download_button(
label="Export hitter gamelogs model",
data=convert_df_to_csv(gamelog_data),
file_name='Gamelogs_Hitter_View.csv',
mime='text/csv',
)
with tab2:
st.info(t_stamp)
col1, col2 = st.columns([1, 9])
with col1:
if st.button("Reset Data", key='reset2'):
st.cache_data.clear()
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines()
t_stamp = f"Updated through: " + str(timestamp) + f" CST"
basic_cols = ['Player', 'Team', 'Date']
basic_season_cols = ['Player', 'Team', 'Date']
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH',
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%']
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%',
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI',
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN']
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR',
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%',
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA',
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN']
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team')
total_teams = indv_teams.Team.values.tolist()
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player')
total_hitters = indv_hitters.Player.values.tolist()
indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player')
total_pitchers = indv_pitchers.Player.values.tolist()
total_dates = hitter_gamelog_table.Date.values.tolist()
sp_split_var1 = st.radio("What table would you like to view?", ('Gamelogs', 'Season Logs'), key='sp_split_var1')
sp_split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='sp_split_var2')
if sp_split_var2 == 'Specific Teams':
sp_team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='sp_team_var1')
elif sp_split_var2 == 'All':
sp_team_var1 = total_teams
sp_split_var3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates'), key='sp_split_var3')
if sp_split_var3 == 'Specific Dates':
sp_low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='sp_low_date')
if sp_low_date is not None:
sp_low_date = pd.to_datetime(sp_low_date).date()
sp_high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='sp_high_date')
if sp_high_date is not None:
sp_high_date = pd.to_datetime(sp_high_date).date()
elif sp_split_var3 == 'All':
sp_low_date = pitcher_gamelog_table['Date'].min()
sp_high_date = pitcher_gamelog_table['Date'].max()
sp_split_var4 = st.radio("Would you like to view all players or specific ones?", ('All', 'Specific Players'), key='sp_split_var4')
if sp_split_var4 == 'Specific Players':
sp_player_var1 = st.multiselect('Which players would you like to include in the tables?', options = total_pitchers, key='sp_player_var1')
elif sp_split_var4 == 'All':
sp_player_var1 = total_pitchers
with col2:
working_data = pitcher_gamelog_table
if sp_split_var1 == 'Season Logs':
choose_cols = st.container()
with choose_cols:
sp_choose_disp = st.multiselect('Which stats would you like to view?', options = season_pitcher_data_cols, default = season_pitcher_data_cols, key='sp_col_display')
disp_stats = basic_season_cols + sp_choose_disp
display = st.container()
working_data = working_data[working_data['Date'] >= sp_low_date]
working_data = working_data[working_data['Date'] <= sp_high_date]
working_data = working_data[working_data['Team'].isin(sp_team_var1)]
working_data = working_data[working_data['Player'].isin(sp_player_var1)]
season_long_table = pitcher_seasonlong_build(working_data)
season_long_table = season_long_table.set_index('Player')
season_long_table_disp = season_long_table.reindex(disp_stats,axis="columns")
display.dataframe(season_long_table_disp.style.format(precision=2), height=750, use_container_width = True)
st.download_button(
label="Export pitcher seasonlogs Model",
data=convert_df_to_csv(season_long_table),
file_name='Seasonlogs_Pitcher_View.csv',
mime='text/csv',
)
elif sp_split_var1 == 'Gamelogs':
choose_cols = st.container()
with choose_cols:
sp_choose_disp_gamelog = st.multiselect('Which stats would you like to view?', options = pitcher_data_cols, default = pitcher_data_cols, key='sp_choose_disp_gamelog')
gamelog_disp_stats = basic_cols + sp_choose_disp_gamelog
working_data = working_data[working_data['Date'] >= sp_low_date]
working_data = working_data[working_data['Date'] <= sp_high_date]
working_data = working_data[working_data['Team'].isin(sp_team_var1)]
working_data = working_data[working_data['Player'].isin(sp_player_var1)]
working_data = working_data.reset_index(drop=True)
gamelog_data = working_data.reindex(gamelog_disp_stats,axis="columns")
display = st.container()
bottom_menu = st.columns((4, 1, 1))
with bottom_menu[2]:
batch_size = st.selectbox("Page Size", options=[25, 50, 100])
with bottom_menu[1]:
total_pages = (
int(len(gamelog_data) / batch_size) if int(len(gamelog_data) / batch_size) > 0 else 1
)
current_page = st.number_input(
"Page", min_value=1, max_value=total_pages, step=1
)
with bottom_menu[0]:
st.markdown(f"Page **{current_page}** of **{total_pages}** ")
pages = split_frame(gamelog_data, batch_size)
# pages = pages.set_index('Player')
display.dataframe(data=pages[current_page - 1].style.format(precision=2), height=500, use_container_width=True)
st.download_button(
label="Export pitcher gamelogs model",
data=convert_df_to_csv(gamelog_data),
file_name='Gamelogs_Hitter_View.csv',
mime='text/csv',
)