import streamlit as st st.set_page_config(layout="wide") for name in dir(): if not name.startswith('_'): del globals()[name] import numpy as np import pandas as pd import streamlit as st import gspread import pymongo import time from io import BytesIO from pymongo.mongo_client import MongoClient import matplotlib.pyplot as plt import certifi ca = certifi.where() @st.cache_resource def init_conn(): scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] credentials = { "type": "service_account", "project_id": "model-sheets-connect", "private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e", "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n", "client_email": "gspread-connection@model-sheets-connect.iam.gserviceaccount.com", "client_id": "100369174533302798535", "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://oauth2.googleapis.com/token", "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com" } uri = "mongodb+srv://multichem:Xr1q5wZdXPbxdUmJ@testcluster.lgwtp5i.mongodb.net/?retryWrites=true&w=majority&appName=TestCluster" client = pymongo.MongoClient(uri, retryWrites=True, serverSelectionTimeoutMS=100000) db = client["testing_db"] gc_con = gspread.service_account_from_dict(credentials, scope) return gc_con, client, db gcservice_account, client, db = init_conn() percentages_format = {'PG': '{:.2%}', 'SG': '{:.2%}', 'SF': '{:.2%}', 'PF': '{:.2%}', 'C': '{:.2%}'} @st.cache_resource(ttl = 599) def init_baselines(): collection = db["MLB_Hitters_DB"] cursor = collection.find() # Finds all documents in the collection raw_display = pd.DataFrame(list(cursor)) hitter_gamelog_table = raw_display[['NameASCII', 'Team', 'Date', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']] hitter_gamelog_table['Date'] = pd.to_datetime(hitter_gamelog_table['Date']) hitter_gamelog_table['Date'] = hitter_gamelog_table['Date'].dt.date data_cols = hitter_gamelog_table.columns.drop(['NameASCII', 'Team', 'Date']) hitter_gamelog_table[data_cols] = hitter_gamelog_table[data_cols].apply(pd.to_numeric, errors='coerce') hitter_gamelog_table = hitter_gamelog_table.set_axis(['Player', 'Team', 'Date', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'], axis=1) collection = db["MLB_Pitchers_DB"] cursor = collection.find() # Finds all documents in the collection raw_display = pd.DataFrame(list(cursor)) pitcher_gamelog_table = raw_display[['NameASCII', 'Team', 'Date', 'G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', 'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA (sc)', 'vFT (sc)', 'vFC (sc)', 'vFS (sc)', 'vFO (sc)', 'vSI (sc)', 'vSL (sc)', 'vCU (sc)', 'vKC (sc)', 'vEP (sc)', 'vCH (sc)', 'vSC (sc)', 'vKN (sc)']] pitcher_gamelog_table.replace("", np.nan, inplace=True) pitcher_gamelog_table['Date'] = pd.to_datetime(pitcher_gamelog_table['Date']) pitcher_gamelog_table['Date'] = pitcher_gamelog_table['Date'].dt.date data_cols = pitcher_gamelog_table.columns.drop(['NameASCII', 'Team', 'Date']) pitcher_gamelog_table[data_cols] = pitcher_gamelog_table[data_cols].apply(pd.to_numeric, errors='coerce') pitcher_gamelog_table = pitcher_gamelog_table.set_axis(['Player', 'Team', 'Date', 'G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', 'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', 'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'], axis=1) timestamp = pitcher_gamelog_table['Date'].max() return hitter_gamelog_table, pitcher_gamelog_table, timestamp @st.cache_data(show_spinner=False) def hitter_seasonlong_build(data_sample): season_long_table = data_sample[['Player', 'Team']] season_long_table['G'] = data_sample.groupby(['Player', 'Team'], sort=False)['G'].transform('sum').astype(int) season_long_table['AB'] = data_sample.groupby(['Player', 'Team'], sort=False)['AB'].transform('sum').astype(int) season_long_table['PA'] = data_sample.groupby(['Player', 'Team'], sort=False)['PA'].transform('sum').astype(int) season_long_table['H'] = data_sample.groupby(['Player', 'Team'], sort=False)['H'].transform('sum').astype(int) season_long_table['1B'] = data_sample.groupby(['Player', 'Team'], sort=False)['1B'].transform('sum').astype(int) season_long_table['2B'] = data_sample.groupby(['Player', 'Team'], sort=False)['2B'].transform('sum').astype(int) season_long_table['3B'] = data_sample.groupby(['Player', 'Team'], sort=False)['3B'].transform('sum').astype(int) season_long_table['HR'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR'].transform('sum').astype(int) season_long_table['R'] = data_sample.groupby(['Player', 'Team'], sort=False)['R'].transform('sum').astype(int) season_long_table['RBI'] = data_sample.groupby(['Player', 'Team'], sort=False)['RBI'].transform('sum').astype(int) season_long_table['BB'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB'].transform('sum').astype(int) season_long_table['IBB'] = data_sample.groupby(['Player', 'Team'], sort=False)['IBB'].transform('sum').astype(int) season_long_table['SO'] = data_sample.groupby(['Player', 'Team'], sort=False)['SO'].transform('sum').astype(int) season_long_table['HBP'] = data_sample.groupby(['Player', 'Team'], sort=False)['HBP'].transform('sum').astype(int) season_long_table['SF'] = data_sample.groupby(['Player', 'Team'], sort=False)['SF'].transform('sum').astype(int) season_long_table['SH'] = data_sample.groupby(['Player', 'Team'], sort=False)['SH'].transform('sum').astype(int) season_long_table['GDP'] = data_sample.groupby(['Player', 'Team'], sort=False)['GDP'].transform('sum').astype(int) season_long_table['SB'] = data_sample.groupby(['Player', 'Team'], sort=False)['SB'].transform('sum').astype(int) season_long_table['CS'] = data_sample.groupby(['Player', 'Team'], sort=False)['CS'].transform('sum').astype(int) season_long_table['Avg AVG'] = data_sample.groupby(['Player', 'Team'], sort=False)['AVG'].transform('mean').astype(float) season_long_table['Avg SLG'] = data_sample.groupby(['Player', 'Team'], sort=False)['SLG'].transform('mean').astype(float) season_long_table['Avg wRC+'] = data_sample.groupby(['Player', 'Team'], sort=False)['wRC+'].transform('mean').astype(float) season_long_table['Avg LD%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LD%'].transform('mean').astype(float) season_long_table['Avg GB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['GB%'].transform('mean').astype(float) season_long_table['Avg FB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['FB%'].transform('mean').astype(float) season_long_table['Avg Hard%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Hard%'].transform('mean').astype(float) season_long_table['Barrels'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrels'].transform('sum').astype(int) season_long_table['Avg Barrel%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrel%'].transform('mean').astype(float) season_long_table = season_long_table.drop_duplicates(subset='Player') season_long_table = season_long_table.sort_values(by='Avg wRC+', ascending=False) season_long_table = season_long_table.set_axis(['Player', 'Team', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'], axis=1) return season_long_table @st.cache_data(show_spinner=False) def hitter_team_build(data_sample): season_long_table = data_sample[['Team']] season_long_table['G'] = data_sample.groupby(['Team'], sort=False)['G'].transform('sum').astype(int) season_long_table['AB'] = data_sample.groupby(['Team'], sort=False)['AB'].transform('sum').astype(int) season_long_table['PA'] = data_sample.groupby(['Team'], sort=False)['PA'].transform('sum').astype(int) season_long_table['H'] = data_sample.groupby(['Team'], sort=False)['H'].transform('sum').astype(int) season_long_table['1B'] = data_sample.groupby(['Team'], sort=False)['1B'].transform('sum').astype(int) season_long_table['2B'] = data_sample.groupby(['Team'], sort=False)['2B'].transform('sum').astype(int) season_long_table['3B'] = data_sample.groupby(['Team'], sort=False)['3B'].transform('sum').astype(int) season_long_table['HR'] = data_sample.groupby(['Team'], sort=False)['HR'].transform('sum').astype(int) season_long_table['R'] = data_sample.groupby(['Team'], sort=False)['R'].transform('sum').astype(int) season_long_table['RBI'] = data_sample.groupby(['Team'], sort=False)['RBI'].transform('sum').astype(int) season_long_table['BB'] = data_sample.groupby(['Team'], sort=False)['BB'].transform('sum').astype(int) season_long_table['IBB'] = data_sample.groupby(['Team'], sort=False)['IBB'].transform('sum').astype(int) season_long_table['SO'] = data_sample.groupby(['Team'], sort=False)['SO'].transform('sum').astype(int) season_long_table['HBP'] = data_sample.groupby(['Team'], sort=False)['HBP'].transform('sum').astype(int) season_long_table['SF'] = data_sample.groupby(['Team'], sort=False)['SF'].transform('sum').astype(int) season_long_table['SH'] = data_sample.groupby(['Team'], sort=False)['SH'].transform('sum').astype(int) season_long_table['GDP'] = data_sample.groupby(['Team'], sort=False)['GDP'].transform('sum').astype(int) season_long_table['SB'] = data_sample.groupby(['Team'], sort=False)['SB'].transform('sum').astype(int) season_long_table['CS'] = data_sample.groupby(['Team'], sort=False)['CS'].transform('sum').astype(int) season_long_table['Avg AVG'] = data_sample.groupby(['Team'], sort=False)['AVG'].transform('mean').astype(float) season_long_table['Avg SLG'] = data_sample.groupby(['Team'], sort=False)['SLG'].transform('mean').astype(float) season_long_table['Avg wRC+'] = data_sample.groupby(['Team'], sort=False)['wRC+'].transform('mean').astype(float) season_long_table['Avg LD%'] = data_sample.groupby(['Team'], sort=False)['LD%'].transform('mean').astype(float) season_long_table['Avg GB%'] = data_sample.groupby(['Team'], sort=False)['GB%'].transform('mean').astype(float) season_long_table['Avg FB%'] = data_sample.groupby(['Team'], sort=False)['FB%'].transform('mean').astype(float) season_long_table['Avg Hard%'] = data_sample.groupby(['Team'], sort=False)['Hard%'].transform('mean').astype(float) season_long_table['Barrels'] = data_sample.groupby(['Team'], sort=False)['Barrels'].transform('sum').astype(int) season_long_table['Avg Barrel%'] = data_sample.groupby(['Team'], sort=False)['Barrel%'].transform('mean').astype(float) season_long_table = season_long_table.drop_duplicates(subset='Team') season_long_table = season_long_table.sort_values(by='Avg wRC+', ascending=False) season_long_table = season_long_table.set_axis(['Team', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'], axis=1) return season_long_table @st.cache_data(show_spinner=False) def pitcher_seasonlong_build(data_sample): season_long_table = data_sample[['Player', 'Team']] season_long_table['G'] = data_sample.groupby(['Player', 'Team'], sort=False)['G'].transform('sum').astype(int) season_long_table['GS'] = data_sample.groupby(['Player', 'Team'], sort=False)['GS'].transform('sum').astype(int) season_long_table['CG'] = data_sample.groupby(['Player', 'Team'], sort=False)['CG'].transform('sum').astype(int) season_long_table['W'] = data_sample.groupby(['Player', 'Team'], sort=False)['W'].transform('sum').astype(int) season_long_table['L'] = data_sample.groupby(['Player', 'Team'], sort=False)['L'].transform('sum').astype(int) season_long_table['Avg ERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['ERA'].transform('mean').astype(float) season_long_table['ShO'] = data_sample.groupby(['Player', 'Team'], sort=False)['ShO'].transform('sum').astype(int) season_long_table['SV'] = data_sample.groupby(['Player', 'Team'], sort=False)['SV'].transform('sum').astype(int) season_long_table['HLD'] = data_sample.groupby(['Player', 'Team'], sort=False)['HLD'].transform('sum').astype(int) season_long_table['BS'] = data_sample.groupby(['Player', 'Team'], sort=False)['BS'].transform('sum').astype(int) season_long_table['IP'] = data_sample.groupby(['Player', 'Team'], sort=False)['IP'].transform('sum').astype(int) season_long_table['TBF'] = data_sample.groupby(['Player', 'Team'], sort=False)['TBF'].transform('sum').astype(int) season_long_table['H'] = data_sample.groupby(['Player', 'Team'], sort=False)['H'].transform('sum').astype(int) season_long_table['R'] = data_sample.groupby(['Player', 'Team'], sort=False)['R'].transform('sum').astype(int) season_long_table['ER'] = data_sample.groupby(['Player', 'Team'], sort=False)['ER'].transform('sum').astype(int) season_long_table['HR'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR'].transform('sum').astype(int) season_long_table['BB'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB'].transform('sum').astype(int) season_long_table['IBB'] = data_sample.groupby(['Player', 'Team'], sort=False)['IBB'].transform('sum').astype(int) season_long_table['HBP'] = data_sample.groupby(['Player', 'Team'], sort=False)['HBP'].transform('sum').astype(int) season_long_table['WP'] = data_sample.groupby(['Player', 'Team'], sort=False)['WP'].transform('sum').astype(int) season_long_table['BK'] = data_sample.groupby(['Player', 'Team'], sort=False)['BK'].transform('sum').astype(int) season_long_table['SO'] = data_sample.groupby(['Player', 'Team'], sort=False)['SO'].transform('sum').astype(int) season_long_table['Avg K/9'] = data_sample.groupby(['Player', 'Team'], sort=False)['K/9'].transform('mean').astype(float) season_long_table['Avg BB/9'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB/9'].transform('mean').astype(float) season_long_table['Avg WHIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['WHIP'].transform('mean').astype(float) season_long_table['Avg BABIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['BABIP'].transform('mean').astype(float) season_long_table['Avg LOB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LOB%'].transform('mean').astype(int) season_long_table['Avg FIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['FIP'].transform('mean').astype(float) season_long_table['Avg xFIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['xFIP'].transform('mean').astype(float) season_long_table['Avg K%'] = data_sample.groupby(['Player', 'Team'], sort=False)['K%'].transform('mean').astype(float) season_long_table['Avg BB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB%'].transform('mean').astype(float) season_long_table['Avg SIERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['SIERA'].transform('mean').astype(float) season_long_table['Avg LD%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LD%'].transform('mean').astype(float) season_long_table['Avg GB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['GB%'].transform('mean').astype(float) season_long_table['Avg FB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['FB%'].transform('mean').astype(float) season_long_table['Avg HR/FB'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR/FB'].transform('mean').astype(float) season_long_table['Avg Hard%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Hard%'].transform('mean').astype(float) season_long_table['Barrels'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrels'].transform('sum').astype(int) season_long_table['Avg Barrel%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrel%'].transform('mean').astype(float) season_long_table['Avg xERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['xERA'].transform('mean').astype(float) season_long_table['Avg vFA'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFA'].transform('mean').astype(float) season_long_table['Avg vFT'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFT'].transform('mean').astype(float) season_long_table['Avg vFC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFC'].transform('mean').astype(float) season_long_table['Avg vFS'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFS'].transform('mean').astype(float) season_long_table['Avg vFO'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFO'].transform('mean').astype(float) season_long_table['Avg vSI'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSI'].transform('mean').astype(float) season_long_table['Avg vSL'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSL'].transform('mean').astype(float) season_long_table['Avg vCU'] = data_sample.groupby(['Player', 'Team'], sort=False)['vCU'].transform('mean').astype(float) season_long_table['Avg vKC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vKC'].transform('mean').astype(float) season_long_table['Avg vEP'] = data_sample.groupby(['Player', 'Team'], sort=False)['vEP'].transform('mean').astype(float) season_long_table['Avg vCH'] = data_sample.groupby(['Player', 'Team'], sort=False)['vCH'].transform('mean').astype(float) season_long_table['Avg vSC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSC'].transform('mean').astype(float) season_long_table['Avg vKN'] = data_sample.groupby(['Player', 'Team'], sort=False)['vKN'].transform('mean').astype(float) season_long_table = season_long_table.drop_duplicates(subset='Player') season_long_table = season_long_table.sort_values(by='SO', ascending=False) season_long_table = season_long_table.set_axis(['Player', 'Team', 'G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', 'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', 'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'], axis=1) return season_long_table @st.cache_data(show_spinner=False) def pitcher_team_build(data_sample): season_long_table = data_sample[['Team']] season_long_table['G'] = data_sample.groupby(['Team'], sort=False)['G'].transform('sum').astype(int) season_long_table['GS'] = data_sample.groupby(['Team'], sort=False)['GS'].transform('sum').astype(int) season_long_table['CG'] = data_sample.groupby(['Team'], sort=False)['CG'].transform('sum').astype(int) season_long_table['W'] = data_sample.groupby(['Team'], sort=False)['W'].transform('sum').astype(int) season_long_table['L'] = data_sample.groupby(['Team'], sort=False)['L'].transform('sum').astype(int) season_long_table['Avg ERA'] = data_sample.groupby(['Team'], sort=False)['ERA'].transform('mean').astype(float) season_long_table['ShO'] = data_sample.groupby(['Team'], sort=False)['ShO'].transform('sum').astype(int) season_long_table['SV'] = data_sample.groupby(['Team'], sort=False)['SV'].transform('sum').astype(int) season_long_table['HLD'] = data_sample.groupby(['Team'], sort=False)['HLD'].transform('sum').astype(int) season_long_table['BS'] = data_sample.groupby(['Team'], sort=False)['BS'].transform('sum').astype(int) season_long_table['IP'] = data_sample.groupby(['Team'], sort=False)['IP'].transform('sum').astype(int) season_long_table['TBF'] = data_sample.groupby(['Team'], sort=False)['TBF'].transform('sum').astype(int) season_long_table['H'] = data_sample.groupby(['Team'], sort=False)['H'].transform('sum').astype(int) season_long_table['R'] = data_sample.groupby(['Team'], sort=False)['R'].transform('sum').astype(int) season_long_table['ER'] = data_sample.groupby(['Team'], sort=False)['ER'].transform('sum').astype(int) season_long_table['HR'] = data_sample.groupby(['Team'], sort=False)['HR'].transform('sum').astype(int) season_long_table['BB'] = data_sample.groupby(['Team'], sort=False)['BB'].transform('sum').astype(int) season_long_table['IBB'] = data_sample.groupby(['Team'], sort=False)['IBB'].transform('sum').astype(int) season_long_table['HBP'] = data_sample.groupby(['Team'], sort=False)['HBP'].transform('sum').astype(int) season_long_table['WP'] = data_sample.groupby(['Team'], sort=False)['WP'].transform('sum').astype(int) season_long_table['BK'] = data_sample.groupby(['Team'], sort=False)['BK'].transform('sum').astype(int) season_long_table['SO'] = data_sample.groupby(['Team'], sort=False)['SO'].transform('sum').astype(int) season_long_table['Avg K/9'] = data_sample.groupby(['Team'], sort=False)['K/9'].transform('mean').astype(float) season_long_table['Avg BB/9'] = data_sample.groupby(['Team'], sort=False)['BB/9'].transform('mean').astype(float) season_long_table['Avg WHIP'] = data_sample.groupby(['Team'], sort=False)['WHIP'].transform('mean').astype(float) season_long_table['Avg BABIP'] = data_sample.groupby(['Team'], sort=False)['BABIP'].transform('mean').astype(float) season_long_table['Avg LOB%'] = data_sample.groupby(['Team'], sort=False)['LOB%'].transform('mean').astype(int) season_long_table['Avg FIP'] = data_sample.groupby(['Team'], sort=False)['FIP'].transform('mean').astype(float) season_long_table['Avg xFIP'] = data_sample.groupby(['Team'], sort=False)['xFIP'].transform('mean').astype(float) season_long_table['Avg K%'] = data_sample.groupby(['Team'], sort=False)['K%'].transform('mean').astype(float) season_long_table['Avg BB%'] = data_sample.groupby(['Team'], sort=False)['BB%'].transform('mean').astype(float) season_long_table['Avg SIERA'] = data_sample.groupby(['Team'], sort=False)['SIERA'].transform('mean').astype(float) season_long_table['Avg LD%'] = data_sample.groupby(['Team'], sort=False)['LD%'].transform('mean').astype(float) season_long_table['Avg GB%'] = data_sample.groupby(['Team'], sort=False)['GB%'].transform('mean').astype(float) season_long_table['Avg FB%'] = data_sample.groupby(['Team'], sort=False)['FB%'].transform('mean').astype(float) season_long_table['Avg HR/FB'] = data_sample.groupby(['Team'], sort=False)['HR/FB'].transform('mean').astype(float) season_long_table['Avg Hard%'] = data_sample.groupby(['Team'], sort=False)['Hard%'].transform('mean').astype(float) season_long_table['Barrels'] = data_sample.groupby(['Team'], sort=False)['Barrels'].transform('sum').astype(int) season_long_table['Avg Barrel%'] = data_sample.groupby(['Team'], sort=False)['Barrel%'].transform('mean').astype(float) season_long_table['Avg xERA'] = data_sample.groupby(['Team'], sort=False)['xERA'].transform('mean').astype(float) season_long_table['Avg vFA'] = data_sample.groupby(['Team'], sort=False)['vFA'].transform('mean').astype(float) season_long_table['Avg vFT'] = data_sample.groupby(['Team'], sort=False)['vFT'].transform('mean').astype(float) season_long_table['Avg vFC'] = data_sample.groupby(['Team'], sort=False)['vFC'].transform('mean').astype(float) season_long_table['Avg vFS'] = data_sample.groupby(['Team'], sort=False)['vFS'].transform('mean').astype(float) season_long_table['Avg vFO'] = data_sample.groupby(['Team'], sort=False)['vFO'].transform('mean').astype(float) season_long_table['Avg vSI'] = data_sample.groupby(['Team'], sort=False)['vSI'].transform('mean').astype(float) season_long_table['Avg vSL'] = data_sample.groupby(['Team'], sort=False)['vSL'].transform('mean').astype(float) season_long_table['Avg vCU'] = data_sample.groupby(['Team'], sort=False)['vCU'].transform('mean').astype(float) season_long_table['Avg vKC'] = data_sample.groupby(['Team'], sort=False)['vKC'].transform('mean').astype(float) season_long_table['Avg vEP'] = data_sample.groupby(['Team'], sort=False)['vEP'].transform('mean').astype(float) season_long_table['Avg vCH'] = data_sample.groupby(['Team'], sort=False)['vCH'].transform('mean').astype(float) season_long_table['Avg vSC'] = data_sample.groupby(['Team'], sort=False)['vSC'].transform('mean').astype(float) season_long_table['Avg vKN'] = data_sample.groupby(['Team'], sort=False)['vKN'].transform('mean').astype(float) season_long_table = season_long_table.drop_duplicates(subset='Team') season_long_table = season_long_table.sort_values(by='SO', ascending=False) season_long_table = season_long_table.set_axis(['Team', 'G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', 'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', 'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'], axis=1) return season_long_table @st.cache_data(show_spinner=False) def split_frame(input_df, rows): df = [input_df.loc[i : i + rows - 1, :] for i in range(0, len(input_df), rows)] return df def convert_df_to_csv(df): return df.to_csv().encode('utf-8') hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines() t_stamp = f"Updated through: " + str(timestamp) + f" CST" basic_cols = ['Player', 'Team', 'Date'] basic_season_cols = ['Player', 'Team', 'Date'] hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'] season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'] pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', 'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', 'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'] season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', 'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', 'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'] indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team') total_teams = indv_teams.Team.values.tolist() indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player') total_hitters = indv_hitters.Player.values.tolist() indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player') total_pitchers = indv_pitchers.Player.values.tolist() total_dates = hitter_gamelog_table.Date.values.tolist() tab1, tab2, tab3 = st.tabs(['Hitter Gamelogs', 'Pitcher Gamelogs', 'Sample Graphs']) with tab1: st.info(t_stamp) col1, col2 = st.columns([1, 9]) with col1: if st.button("Reset Data", key='reset1'): st.cache_data.clear() hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines() t_stamp = f"Updated through: " + str(timestamp) + f" CST" basic_cols = ['Player', 'Team', 'Date'] basic_season_cols = ['Player', 'Team', 'Date'] hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'] season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'] pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', 'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', 'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'] season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', 'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', 'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'] indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team') total_teams = indv_teams.Team.values.tolist() indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player') total_hitters = indv_hitters.Player.values.tolist() indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player') total_pitchers = indv_pitchers.Player.values.tolist() total_dates = hitter_gamelog_table.Date.values.tolist() split_var1 = st.radio("What table would you like to view?", ('Season Logs', 'Game logs', 'Team Logs'), key='split_var1') split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='split_var2') if split_var2 == 'Specific Teams': team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='team_var1') elif split_var2 == 'All': team_var1 = total_teams split_var3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates'), key='split_var3') if split_var3 == 'Specific Dates': low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='low_date') if low_date is not None: low_date = pd.to_datetime(low_date).date() high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='high_date') if high_date is not None: high_date = pd.to_datetime(high_date).date() elif split_var3 == 'All': low_date = hitter_gamelog_table['Date'].min() high_date = hitter_gamelog_table['Date'].max() split_var4 = st.radio("Would you like to view all players or specific ones?", ('All', 'Specific Players'), key='split_var4') if split_var4 == 'Specific Players': player_var1 = st.multiselect('Which players would you like to include in the tables?', options = total_hitters, key='player_var1') elif split_var4 == 'All': player_var1 = total_hitters with col2: working_data = hitter_gamelog_table if split_var1 == 'Season Logs': choose_cols = st.container() with choose_cols: choose_disp = st.multiselect('Which stats would you like to view?', options = season_hitter_data_cols, default = season_hitter_data_cols, key='col_display') disp_stats = basic_season_cols + choose_disp display = st.container() working_data = working_data[working_data['Date'] >= low_date] working_data = working_data[working_data['Date'] <= high_date] working_data = working_data[working_data['Team'].isin(team_var1)] working_data = working_data[working_data['Player'].isin(player_var1)] season_long_table = hitter_seasonlong_build(working_data) season_long_table = season_long_table.set_index('Player') season_long_table_disp = season_long_table.reindex(disp_stats,axis="columns") season_long_table_disp = season_long_table_disp.drop(['Player', 'Date'], axis=1) display.dataframe(season_long_table_disp.style.format(precision=2), height=750, use_container_width = True) st.download_button( label="Export hitter seasonlogs Model", data=convert_df_to_csv(season_long_table), file_name='Seasonlogs_Hitter_View.csv', mime='text/csv', ) elif split_var1 == 'Team Logs': choose_cols = st.container() with choose_cols: choose_disp = st.multiselect('Which stats would you like to view?', options = season_hitter_data_cols, default = season_hitter_data_cols, key='col_display') disp_stats = basic_season_cols + choose_disp display = st.container() working_data = working_data[working_data['Date'] >= low_date] working_data = working_data[working_data['Date'] <= high_date] working_data = working_data[working_data['Team'].isin(team_var1)] team_table = hitter_team_build(working_data) team_table = team_table.set_index('Team') team_table_disp = team_table.reindex(disp_stats,axis="columns") team_table_disp = team_table_disp.drop(['Team', 'Date', 'Player'], axis=1) display.dataframe(team_table_disp.style.format(precision=2), height=750, use_container_width = True) st.download_button( label="Export hitter team logs Model", data=convert_df_to_csv(team_table), file_name='Seasonlogs_Hitter_View.csv', mime='text/csv', ) elif split_var1 == 'Game logs': choose_cols = st.container() with choose_cols: choose_disp_gamelog = st.multiselect('Which stats would you like to view?', options = hitter_data_cols, default = hitter_data_cols, key='choose_disp_gamelog') gamelog_disp_stats = basic_cols + choose_disp_gamelog working_data = working_data[working_data['Date'] >= low_date] working_data = working_data[working_data['Date'] <= high_date] working_data = working_data[working_data['Team'].isin(team_var1)] working_data = working_data[working_data['Player'].isin(player_var1)] working_data = working_data.reset_index(drop=True) gamelog_data = working_data.reindex(gamelog_disp_stats,axis="columns") display = st.container() bottom_menu = st.columns((4, 1, 1)) with bottom_menu[2]: batch_size = st.selectbox("Page Size", options=[25, 50, 100], key='hitter_pagination') with bottom_menu[1]: total_pages = ( int(len(gamelog_data) / batch_size) if int(len(gamelog_data) / batch_size) > 0 else 1 ) current_page = st.number_input( "Page", min_value=1, max_value=total_pages, step=1 ) with bottom_menu[0]: st.markdown(f"Page **{current_page}** of **{total_pages}** ") pages = split_frame(gamelog_data, batch_size) # pages = pages.set_index('Player') if len(player_var1) > 0: display.dataframe(data=pages[current_page - 1].style.format(precision=2), height=500, use_container_width=True) st.download_button( label="Export hitter gamelogs model", data=convert_df_to_csv(gamelog_data), file_name='Gamelogs_Hitter_View.csv', mime='text/csv', ) with tab2: st.info(t_stamp) col1, col2 = st.columns([1, 9]) with col1: if st.button("Reset Data", key='reset2'): st.cache_data.clear() hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines() t_stamp = f"Updated through: " + str(timestamp) + f" CST" basic_cols = ['Player', 'Team', 'Date'] basic_season_cols = ['Player', 'Team', 'Date'] hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'] season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'] pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', 'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', 'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'] season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', 'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', 'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'] indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team') total_teams = indv_teams.Team.values.tolist() indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player') total_hitters = indv_hitters.Player.values.tolist() indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player') total_pitchers = indv_pitchers.Player.values.tolist() total_dates = hitter_gamelog_table.Date.values.tolist() sp_split_var1 = st.radio("What table would you like to view?", ('Season Logs', 'Gamelogs'), key='sp_split_var1') sp_split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='sp_split_var2') if sp_split_var2 == 'Specific Teams': sp_team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='sp_team_var1') elif sp_split_var2 == 'All': sp_team_var1 = total_teams sp_split_var3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates'), key='sp_split_var3') if sp_split_var3 == 'Specific Dates': sp_low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='sp_low_date') if sp_low_date is not None: sp_low_date = pd.to_datetime(sp_low_date).date() sp_high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='sp_high_date') if sp_high_date is not None: sp_high_date = pd.to_datetime(sp_high_date).date() elif sp_split_var3 == 'All': sp_low_date = pitcher_gamelog_table['Date'].min() sp_high_date = pitcher_gamelog_table['Date'].max() sp_split_var4 = st.radio("Would you like to view all players or specific ones?", ('All', 'Specific Players'), key='sp_split_var4') if sp_split_var4 == 'Specific Players': sp_player_var1 = st.multiselect('Which players would you like to include in the tables?', options = total_pitchers, key='sp_player_var1') elif sp_split_var4 == 'All': sp_player_var1 = total_pitchers with col2: working_data = pitcher_gamelog_table if sp_split_var1 == 'Season Logs': choose_cols = st.container() with choose_cols: sp_choose_disp = st.multiselect('Which stats would you like to view?', options = season_pitcher_data_cols, default = season_pitcher_data_cols, key='sp_col_display') disp_stats = basic_season_cols + sp_choose_disp display = st.container() working_data = working_data[working_data['Date'] >= sp_low_date] working_data = working_data[working_data['Date'] <= sp_high_date] working_data = working_data[working_data['Team'].isin(sp_team_var1)] working_data = working_data[working_data['Player'].isin(sp_player_var1)] season_long_table = pitcher_seasonlong_build(working_data) season_long_table = season_long_table.set_index('Player') season_long_table_disp = season_long_table.reindex(disp_stats,axis="columns") season_long_table_disp = season_long_table_disp.drop(['Player', 'Date'], axis=1) display.dataframe(season_long_table_disp.style.format(precision=2), height=750, use_container_width = True) st.download_button( label="Export pitcher seasonlogs Model", data=convert_df_to_csv(season_long_table), file_name='Seasonlogs_Pitcher_View.csv', mime='text/csv', ) elif sp_split_var1 == 'Gamelogs': choose_cols = st.container() with choose_cols: sp_choose_disp_gamelog = st.multiselect('Which stats would you like to view?', options = pitcher_data_cols, default = pitcher_data_cols, key='sp_choose_disp_gamelog') gamelog_disp_stats = basic_cols + sp_choose_disp_gamelog working_data = working_data[working_data['Date'] >= sp_low_date] working_data = working_data[working_data['Date'] <= sp_high_date] working_data = working_data[working_data['Team'].isin(sp_team_var1)] working_data = working_data[working_data['Player'].isin(sp_player_var1)] working_data = working_data.reset_index(drop=True) gamelog_data = working_data.reindex(gamelog_disp_stats,axis="columns") display = st.container() bottom_menu = st.columns((4, 1, 1)) with bottom_menu[2]: batch_size = st.selectbox("Page Size", options=[25, 50, 100], key='pitcher_pagination') with bottom_menu[1]: total_pages = ( int(len(gamelog_data) / batch_size) if int(len(gamelog_data) / batch_size) > 0 else 1 ) current_page = st.number_input( "Page", min_value=1, max_value=total_pages, step=1 ) with bottom_menu[0]: st.markdown(f"Page **{current_page}** of **{total_pages}** ") pages = split_frame(gamelog_data, batch_size) # pages = pages.set_index('Player') display.dataframe(data=pages[current_page - 1].style.format(precision=2), height=500, use_container_width=True) st.download_button( label="Export pitcher gamelogs model", data=convert_df_to_csv(gamelog_data), file_name='Gamelogs_Hitter_View.csv', mime='text/csv', ) with tab3: st.info(t_stamp) st.info("Note when creating graphs with multiple stats: The LEFT y-axis will be locked to values of the first stat you choose, while the RIGHT y-axis will be locked to the values of the second or third stat you chose depending on wether you are viewing Two or Three stats. So, to maximize the use of the graphs, you'll want to make sure that you are using compatible stats. I.E. use percentages together like GB% and FB% or average based stats like AVG and BABIP") col1, col2, col3 = st.columns([1, 7, 2]) with col1: if st.button("Reset Data", key='reset3'): st.cache_data.clear() hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines() t_stamp = f"Updated through: " + str(timestamp) + f" CST" basic_cols = ['Player', 'Team', 'Date'] basic_season_cols = ['Player', 'Team', 'Date'] hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'] season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', 'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'] pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', 'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', 'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'] season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', 'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', 'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', 'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'] indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team') total_teams = indv_teams.Team.values.tolist() indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player') total_hitters = indv_hitters.Player.values.tolist() indv_pitchers = pitcher_gamelog_table.drop_duplicates(subset='Player') total_pitchers = indv_pitchers.Player.values.tolist() total_dates = hitter_gamelog_table.Date.values.tolist() plot_type = st.radio("Are you viewing hitter or pitcher stats?", ('Pitcher', 'Hitter'), key='plot_type') if plot_type == "Pitcher": player_drop = total_pitchers stat_drop = pitcher_data_cols working_data = pitcher_gamelog_table elif plot_type == "Hitter": player_drop = total_hitters stat_drop = hitter_data_cols working_data = hitter_gamelog_table player_var3 = st.selectbox("Which player are you viewing?", player_drop, key='player_var3') plot_count = st.radio("how many stats would you like to plot?", ('One', 'Two', 'Three'), key='plot_count') if plot_count == "One": plot_var1 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var1') elif plot_count == "Two": plot_var1 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var1') plot_var2 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var2') elif plot_count == "Three": plot_var1 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var1') plot_var2 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var2') plot_var3 = st.selectbox("Which stat are you viewing?", stat_drop, key='plot_var3') date_var_3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates', '5-day Averages', '10-day Averages'), key='date_var_3') if date_var_3 == 'Specific Dates': plot_low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='plot_low_date') if plot_low_date is not None: plot_low_date = pd.to_datetime(plot_low_date).date() plot_high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='plot_high_date') if plot_high_date is not None: plot_high_date = pd.to_datetime(plot_high_date).date() else: plot_low_date = pitcher_gamelog_table['Date'].min() plot_high_date = pitcher_gamelog_table['Date'].max() with col2: working_data = working_data[working_data['Date'] >= sp_low_date] working_data = working_data[working_data['Date'] <= sp_high_date] working_data = working_data[working_data['Team'].isin(sp_team_var1)] working_data = working_data[working_data['Player'] == player_var3] if date_var_3 == '5-day Averages': if plot_count == "One": working_data['Date'] = pd.to_datetime(working_data['Date']) working_data = working_data.set_index('Date') working_data = working_data[plot_var1].rolling('5D').mean() working_data = working_data.reset_index() elif plot_count == "Two": working_data['Date'] = pd.to_datetime(working_data['Date']) working_data = working_data.set_index('Date') working_data1 = working_data[plot_var1].rolling('5D').mean() working_data2 = working_data[plot_var2].rolling('5D').mean() join_list = [working_data1, working_data2] working_data = pd.concat(join_list, join='outer', axis=1) working_data = working_data.reset_index() elif plot_count == "Three": working_data['Date'] = pd.to_datetime(working_data['Date']) working_data = working_data.set_index('Date') working_data1 = working_data[plot_var1].rolling('5D').mean() working_data2 = working_data[plot_var2].rolling('5D').mean() working_data3 = working_data[plot_var3].rolling('5D').mean() join_list = [working_data1, working_data2, working_data3] working_data = pd.concat(join_list, join='outer', axis=1) working_data = working_data.reset_index() elif date_var_3 == '10-day Averages': if plot_count == "One": working_data['Date'] = pd.to_datetime(working_data['Date']) working_data = working_data.set_index('Date') working_data = working_data[plot_var1].rolling('10D').mean() working_data = working_data.reset_index() elif plot_count == "Two": working_data['Date'] = pd.to_datetime(working_data['Date']) working_data = working_data.set_index('Date') working_data1 = working_data[plot_var1].rolling('10D').mean() working_data2 = working_data[plot_var2].rolling('10D').mean() join_list = [working_data1, working_data2] working_data = pd.concat(join_list, join='outer', axis=1) working_data = working_data.reset_index() elif plot_count == "Three": working_data['Date'] = pd.to_datetime(working_data['Date']) working_data = working_data.set_index('Date') working_data1 = working_data[plot_var1].rolling('10D').mean() working_data2 = working_data[plot_var2].rolling('10D').mean() working_data3 = working_data[plot_var3].rolling('10D').mean() join_list = [working_data1, working_data2, working_data3] working_data = pd.concat(join_list, join='outer', axis=1) working_data = working_data.reset_index() else: working_data = working_data if plot_count == "One": graph_data = working_data.reindex(['Date', plot_var1],axis="columns") disp_data = working_data fig, ax1 = plt.subplots(figsize=(20, 10), layout='tight') color = 'tab:blue' ax1.set_xlabel('Date') ax1.set_ylabel(plot_var1, color = color) ax1.plot(graph_data['Date'], graph_data[plot_var1], color = color) ax1.tick_params(axis ='y', labelcolor = color) buf = BytesIO() fig.savefig(buf, format="png") st.image(buf) elif plot_count == "Two": graph_data = working_data.reindex(['Date', plot_var1, plot_var2],axis="columns") disp_data = working_data fig, ax1 = plt.subplots(figsize=(20, 10), layout='tight') color = 'tab:blue' ax1.set_xlabel('Date') ax1.set_ylabel(plot_var1, color = color) ax1.plot(graph_data['Date'], graph_data[plot_var1], color = color) ax1.tick_params(axis ='y', labelcolor = color) ax2 = ax1.twinx() color = 'tab:green' ax2.set_ylabel(plot_var2, color = color) ax2.plot(graph_data['Date'], graph_data[plot_var2], color = color) ax2.tick_params(axis ='y', labelcolor = color) fig.legend([plot_var1, plot_var2], loc="upper right") buf = BytesIO() fig.savefig(buf, format="png") st.image(buf) elif plot_count == "Three": graph_data = working_data.reindex(['Date', plot_var1, plot_var2, plot_var3],axis="columns") disp_data = working_data fig, ax1 = plt.subplots(figsize=(20, 10), layout='tight') color = 'tab:blue' color2 = 'tab:orange' ax1.set_xlabel('Date') ax1.set_ylabel(str(plot_var1 + " / " + plot_var2), color = color) ax1.plot(graph_data['Date'], graph_data[plot_var1], color = color) ax1.plot(graph_data['Date'], graph_data[plot_var2], color = color2) ax1.tick_params(axis ='y', labelcolor = color) ax2 = ax1.twinx() color = 'tab:green' ax2.set_ylabel(plot_var3, color = color) ax2.plot(graph_data['Date'], graph_data[plot_var3], color = color) ax2.tick_params(axis ='y', labelcolor = color) fig.legend([plot_var1, plot_var2, plot_var3], loc="upper right") buf = BytesIO() fig.savefig(buf, format="png") st.image(buf) with col3: st.table(disp_data)