Spaces:
Sleeping
Sleeping
import streamlit as st | |
st.set_page_config(layout="wide") | |
for name in dir(): | |
if not name.startswith('_'): | |
del globals()[name] | |
import numpy as np | |
import pandas as pd | |
import streamlit as st | |
import gspread | |
import gc | |
import plotly.express as px | |
import plotly.io as pio | |
import pymongo | |
import certifi | |
ca = certifi.where() | |
def init_conn(): | |
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] | |
credentials = { | |
"type": "service_account", | |
"project_id": "model-sheets-connect", | |
"private_key_id": "0e0bc2fdef04e771172fe5807392b9d6639d945e", | |
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDiu1v/e6KBKOcK\ncx0KQ23nZK3ZVvADYy8u/RUn/EDI82QKxTd/DizRLIV81JiNQxDJXSzgkbwKYEDm\n48E8zGvupU8+Nk76xNPakrQKy2Y8+VJlq5psBtGchJTuUSHcXU5Mg2JhQsB376PJ\nsCw552K6Pw8fpeMDJDZuxpKSkaJR6k9G5Dhf5q8HDXnC5Rh/PRFuKJ2GGRpX7n+2\nhT/sCax0J8jfdTy/MDGiDfJqfQrOPrMKELtsGHR9Iv6F4vKiDqXpKfqH+02E9ptz\nBk+MNcbZ3m90M8ShfRu28ebebsASfarNMzc3dk7tb3utHOGXKCf4tF8yYKo7x8BZ\noO9X4gSfAgMBAAECggEAU8ByyMpSKlTCF32TJhXnVJi/kS+IhC/Qn5JUDMuk4LXr\naAEWsWO6kV/ZRVXArjmuSzuUVrXumISapM9Ps5Ytbl95CJmGDiLDwRL815nvv6k3\nUyAS8EGKjz74RpoIoH6E7EWCAzxlnUgTn+5oP9Flije97epYk3H+e2f1f5e1Nn1d\nYNe8U+1HqJgILcxA1TAUsARBfoD7+K3z/8DVPHI8IpzAh6kTHqhqC23Rram4XoQ6\nzj/ZdVBjvnKuazETfsD+Vl3jGLQA8cKQVV70xdz3xwLcNeHsbPbpGBpZUoF73c65\nkAXOrjYl0JD5yAk+hmYhXr6H9c6z5AieuZGDrhmlFQKBgQDzV6LRXmjn4854DP/J\nI82oX2GcI4eioDZPRukhiQLzYerMQBmyqZIRC+/LTCAhYQSjNgMa+ZKyvLqv48M0\n/x398op/+n3xTs+8L49SPI48/iV+mnH7k0WI/ycd4OOKh8rrmhl/0EWb9iitwJYe\nMjTV/QxNEpPBEXfR1/mvrN/lVQKBgQDuhomOxUhWVRVH6x03slmyRBn0Oiw4MW+r\nrt1hlNgtVmTc5Mu+4G0USMZwYuOB7F8xG4Foc7rIlwS7Ic83jMJxemtqAelwOLdV\nXRLrLWJfX8+O1z/UE15l2q3SUEnQ4esPHbQnZowHLm0mdL14qSVMl1mu1XfsoZ3z\nJZTQb48CIwKBgEWbzQRtKD8lKDupJEYqSrseRbK/ax43DDITS77/DWwHl33D3FYC\nMblUm8ygwxQpR4VUfwDpYXBlklWcJovzamXpSnsfcYVkkQH47NuOXPXPkXQsw+w+\nDYcJzeu7F/vZqk9I7oBkWHUrrik9zPNoUzrfPvSRGtkAoTDSwibhoc5dAoGBAMHE\nK0T/ANeZQLNuzQps6S7G4eqjwz5W8qeeYxsdZkvWThOgDd/ewt3ijMnJm5X05hOn\ni4XF1euTuvUl7wbqYx76Wv3/1ZojiNNgy7ie4rYlyB/6vlBS97F4ZxJdxMlabbCW\n6b3EMWa4EVVXKoA1sCY7IVDE+yoQ1JYsZmq45YzPAoGBANWWHuVueFGZRDZlkNlK\nh5OmySmA0NdNug3G1upaTthyaTZ+CxGliwBqMHAwpkIRPwxUJpUwBTSEGztGTAxs\nWsUOVWlD2/1JaKSmHE8JbNg6sxLilcG6WEDzxjC5dLL1OrGOXj9WhC9KX3sq6qb6\nF/j9eUXfXjAlb042MphoF3ZC\n-----END PRIVATE KEY-----\n", | |
"client_email": "[email protected]", | |
"client_id": "100369174533302798535", | |
"auth_uri": "https://accounts.google.com/o/oauth2/auth", | |
"token_uri": "https://oauth2.googleapis.com/token", | |
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", | |
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gspread-connection%40model-sheets-connect.iam.gserviceaccount.com" | |
} | |
client = pymongo.MongoClient("mongodb+srv://multichem:[email protected]/testing_db") | |
db = client["testing_db"] | |
gc_con = gspread.service_account_from_dict(credentials, scope) | |
return gc_con, client, db | |
gcservice_account, client, db = init_conn() | |
percentages_format = {'PG': '{:.2%}', 'SG': '{:.2%}', 'SF': '{:.2%}', 'PF': '{:.2%}', 'C': '{:.2%}'} | |
def init_baselines(): | |
collection = db["MLB_Hitters_DB"] | |
cursor = collection.find() # Finds all documents in the collection | |
raw_display = pd.DataFrame(list(cursor)) | |
hitter_gamelog_table = raw_display[['NameASCII', 'Team', 'Date', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%']] | |
hitter_gamelog_table['Date'] = pd.to_datetime(hitter_gamelog_table['Date']) | |
hitter_gamelog_table['Date'] = hitter_gamelog_table['Date'].dt.date | |
data_cols = hitter_gamelog_table.columns.drop(['NameASCII', 'Team', 'Date']) | |
hitter_gamelog_table[data_cols] = hitter_gamelog_table[data_cols].apply(pd.to_numeric, errors='coerce') | |
hitter_gamelog_table = hitter_gamelog_table.set_axis(['Player', 'Team', 'Date', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'], axis=1) | |
collection = db["MLB_Pitchers_DB"] | |
cursor = collection.find() # Finds all documents in the collection | |
raw_display = pd.DataFrame(list(cursor)) | |
pitcher_gamelog_table = raw_display[['NameASCII', 'Team', 'Date', 'G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', | |
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA (sc)', 'vFT (sc)', 'vFC (sc)', 'vFS (sc)', 'vFO (sc)', 'vSI (sc)', | |
'vSL (sc)', 'vCU (sc)', 'vKC (sc)', 'vEP (sc)', 'vCH (sc)', 'vSC (sc)', 'vKN (sc)']] | |
pitcher_gamelog_table.replace("", np.nan, inplace=True) | |
pitcher_gamelog_table['Date'] = pd.to_datetime(pitcher_gamelog_table['Date']) | |
pitcher_gamelog_table['Date'] = pitcher_gamelog_table['Date'].dt.date | |
data_cols = pitcher_gamelog_table.columns.drop(['NameASCII', 'Team', 'Date']) | |
pitcher_gamelog_table[data_cols] = pitcher_gamelog_table[data_cols].apply(pd.to_numeric, errors='coerce') | |
pitcher_gamelog_table = pitcher_gamelog_table.set_axis(['Player', 'Team', 'Date', 'G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', | |
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', | |
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'], axis=1) | |
timestamp = pitcher_gamelog_table['Date'].max() | |
return hitter_gamelog_table, pitcher_gamelog_table, timestamp | |
def hitter_seasonlong_build(data_sample): | |
season_long_table = data_sample[['Player', 'Team']] | |
season_long_table['G'] = data_sample.groupby(['Player', 'Team'], sort=False)['G'].transform('sum').astype(int) | |
season_long_table['AB'] = data_sample.groupby(['Player', 'Team'], sort=False)['AB'].transform('sum').astype(int) | |
season_long_table['PA'] = data_sample.groupby(['Player', 'Team'], sort=False)['PA'].transform('sum').astype(int) | |
season_long_table['H'] = data_sample.groupby(['Player', 'Team'], sort=False)['H'].transform('sum').astype(int) | |
season_long_table['1B'] = data_sample.groupby(['Player', 'Team'], sort=False)['1B'].transform('sum').astype(int) | |
season_long_table['2B'] = data_sample.groupby(['Player', 'Team'], sort=False)['2B'].transform('sum').astype(int) | |
season_long_table['3B'] = data_sample.groupby(['Player', 'Team'], sort=False)['3B'].transform('sum').astype(int) | |
season_long_table['HR'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR'].transform('sum').astype(int) | |
season_long_table['R'] = data_sample.groupby(['Player', 'Team'], sort=False)['R'].transform('sum').astype(int) | |
season_long_table['RBI'] = data_sample.groupby(['Player', 'Team'], sort=False)['RBI'].transform('sum').astype(int) | |
season_long_table['BB'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB'].transform('sum').astype(int) | |
season_long_table['IBB'] = data_sample.groupby(['Player', 'Team'], sort=False)['IBB'].transform('sum').astype(int) | |
season_long_table['SO'] = data_sample.groupby(['Player', 'Team'], sort=False)['SO'].transform('sum').astype(int) | |
season_long_table['HBP'] = data_sample.groupby(['Player', 'Team'], sort=False)['HBP'].transform('sum').astype(int) | |
season_long_table['SF'] = data_sample.groupby(['Player', 'Team'], sort=False)['SF'].transform('sum').astype(int) | |
season_long_table['SH'] = data_sample.groupby(['Player', 'Team'], sort=False)['SH'].transform('sum').astype(int) | |
season_long_table['GDP'] = data_sample.groupby(['Player', 'Team'], sort=False)['GDP'].transform('sum').astype(int) | |
season_long_table['SB'] = data_sample.groupby(['Player', 'Team'], sort=False)['SB'].transform('sum').astype(int) | |
season_long_table['CS'] = data_sample.groupby(['Player', 'Team'], sort=False)['CS'].transform('sum').astype(int) | |
season_long_table['Avg AVG'] = data_sample.groupby(['Player', 'Team'], sort=False)['AVG'].transform('mean').astype(float) | |
season_long_table['Avg SLG'] = data_sample.groupby(['Player', 'Team'], sort=False)['SLG'].transform('mean').astype(float) | |
season_long_table['Avg wRC+'] = data_sample.groupby(['Player', 'Team'], sort=False)['wRC+'].transform('mean').astype(float) | |
season_long_table['Avg LD%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LD%'].transform('mean').astype(float) | |
season_long_table['Avg GB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['GB%'].transform('mean').astype(float) | |
season_long_table['Avg FB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['FB%'].transform('mean').astype(float) | |
season_long_table['Avg Hard%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Hard%'].transform('mean').astype(float) | |
season_long_table['Barrels'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrels'].transform('sum').astype(int) | |
season_long_table['Avg Barrel%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrel%'].transform('mean').astype(float) | |
season_long_table = season_long_table.drop_duplicates(subset='Player') | |
season_long_table = season_long_table.sort_values(by='Avg wRC+', ascending=False) | |
season_long_table = season_long_table.set_axis(['Player', 'Team', 'G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'], axis=1) | |
return season_long_table | |
def pitcher_seasonlong_build(data_sample): | |
season_long_table = data_sample[['Player', 'Team']] | |
season_long_table['G'] = data_sample.groupby(['Player', 'Team'], sort=False)['G'].transform('sum').astype(int) | |
season_long_table['GS'] = data_sample.groupby(['Player', 'Team'], sort=False)['GS'].transform('sum').astype(int) | |
season_long_table['CG'] = data_sample.groupby(['Player', 'Team'], sort=False)['CG'].transform('sum').astype(int) | |
season_long_table['W'] = data_sample.groupby(['Player', 'Team'], sort=False)['W'].transform('sum').astype(int) | |
season_long_table['L'] = data_sample.groupby(['Player', 'Team'], sort=False)['L'].transform('sum').astype(int) | |
season_long_table['Avg ERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['ERA'].transform('mean').astype(float) | |
season_long_table['ShO'] = data_sample.groupby(['Player', 'Team'], sort=False)['ShO'].transform('sum').astype(int) | |
season_long_table['SV'] = data_sample.groupby(['Player', 'Team'], sort=False)['SV'].transform('sum').astype(int) | |
season_long_table['HLD'] = data_sample.groupby(['Player', 'Team'], sort=False)['HLD'].transform('sum').astype(int) | |
season_long_table['BS'] = data_sample.groupby(['Player', 'Team'], sort=False)['BS'].transform('sum').astype(int) | |
season_long_table['IP'] = data_sample.groupby(['Player', 'Team'], sort=False)['IP'].transform('sum').astype(int) | |
season_long_table['TBF'] = data_sample.groupby(['Player', 'Team'], sort=False)['TBF'].transform('sum').astype(int) | |
season_long_table['H'] = data_sample.groupby(['Player', 'Team'], sort=False)['H'].transform('sum').astype(int) | |
season_long_table['R'] = data_sample.groupby(['Player', 'Team'], sort=False)['R'].transform('sum').astype(int) | |
season_long_table['ER'] = data_sample.groupby(['Player', 'Team'], sort=False)['ER'].transform('sum').astype(int) | |
season_long_table['HR'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR'].transform('sum').astype(int) | |
season_long_table['BB'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB'].transform('sum').astype(int) | |
season_long_table['IBB'] = data_sample.groupby(['Player', 'Team'], sort=False)['IBB'].transform('sum').astype(int) | |
season_long_table['HBP'] = data_sample.groupby(['Player', 'Team'], sort=False)['HBP'].transform('sum').astype(int) | |
season_long_table['WP'] = data_sample.groupby(['Player', 'Team'], sort=False)['WP'].transform('sum').astype(int) | |
season_long_table['BK'] = data_sample.groupby(['Player', 'Team'], sort=False)['BK'].transform('sum').astype(int) | |
season_long_table['SO'] = data_sample.groupby(['Player', 'Team'], sort=False)['SO'].transform('sum').astype(int) | |
season_long_table['Avg K/9'] = data_sample.groupby(['Player', 'Team'], sort=False)['K/9'].transform('mean').astype(float) | |
season_long_table['Avg BB/9'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB/9'].transform('mean').astype(float) | |
season_long_table['Avg WHIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['WHIP'].transform('mean').astype(float) | |
season_long_table['Avg BABIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['BABIP'].transform('mean').astype(float) | |
season_long_table['Avg LOB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LOB%'].transform('mean').astype(int) | |
season_long_table['Avg FIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['FIP'].transform('mean').astype(float) | |
season_long_table['Avg xFIP'] = data_sample.groupby(['Player', 'Team'], sort=False)['xFIP'].transform('mean').astype(float) | |
season_long_table['Avg K%'] = data_sample.groupby(['Player', 'Team'], sort=False)['K%'].transform('mean').astype(float) | |
season_long_table['Avg BB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['BB%'].transform('mean').astype(float) | |
season_long_table['Avg SIERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['SIERA'].transform('mean').astype(float) | |
season_long_table['Avg LD%'] = data_sample.groupby(['Player', 'Team'], sort=False)['LD%'].transform('mean').astype(float) | |
season_long_table['Avg GB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['GB%'].transform('mean').astype(float) | |
season_long_table['Avg FB%'] = data_sample.groupby(['Player', 'Team'], sort=False)['FB%'].transform('mean').astype(float) | |
season_long_table['Avg HR/FB'] = data_sample.groupby(['Player', 'Team'], sort=False)['HR/FB'].transform('mean').astype(float) | |
season_long_table['Avg Hard%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Hard%'].transform('mean').astype(float) | |
season_long_table['Barrels'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrels'].transform('sum').astype(int) | |
season_long_table['Avg Barrel%'] = data_sample.groupby(['Player', 'Team'], sort=False)['Barrel%'].transform('mean').astype(float) | |
season_long_table['Avg xERA'] = data_sample.groupby(['Player', 'Team'], sort=False)['xERA'].transform('mean').astype(float) | |
season_long_table['Avg vFA'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFA'].transform('mean').astype(float) | |
season_long_table['Avg vFT'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFT'].transform('mean').astype(float) | |
season_long_table['Avg vFC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFC'].transform('mean').astype(float) | |
season_long_table['Avg vFS'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFS'].transform('mean').astype(float) | |
season_long_table['Avg vFO'] = data_sample.groupby(['Player', 'Team'], sort=False)['vFO'].transform('mean').astype(float) | |
season_long_table['Avg vSI'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSI'].transform('mean').astype(float) | |
season_long_table['Avg vSL'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSL'].transform('mean').astype(float) | |
season_long_table['Avg vCU'] = data_sample.groupby(['Player', 'Team'], sort=False)['vCU'].transform('mean').astype(float) | |
season_long_table['Avg vKC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vKC'].transform('mean').astype(float) | |
season_long_table['Avg vEP'] = data_sample.groupby(['Player', 'Team'], sort=False)['vEP'].transform('mean').astype(float) | |
season_long_table['Avg vCH'] = data_sample.groupby(['Player', 'Team'], sort=False)['vCH'].transform('mean').astype(float) | |
season_long_table['Avg vSC'] = data_sample.groupby(['Player', 'Team'], sort=False)['vSC'].transform('mean').astype(float) | |
season_long_table['Avg vKN'] = data_sample.groupby(['Player', 'Team'], sort=False)['vKN'].transform('mean').astype(float) | |
season_long_table = season_long_table.drop_duplicates(subset='Player') | |
season_long_table = season_long_table.sort_values(by='SO', ascending=False) | |
season_long_table = season_long_table.set_axis(['Player', 'Team', 'G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', | |
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', | |
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'], axis=1) | |
return season_long_table | |
def split_frame(input_df, rows): | |
df = [input_df.loc[i : i + rows - 1, :] for i in range(0, len(input_df), rows)] | |
return df | |
def convert_df_to_csv(df): | |
return df.to_csv().encode('utf-8') | |
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines() | |
t_stamp = f"Updated through: " + str(timestamp) + f" CST" | |
basic_cols = ['Player', 'Team'] | |
basic_season_cols = ['Player', 'Team'] | |
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'] | |
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'] | |
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', | |
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', | |
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'] | |
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', | |
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', | |
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'] | |
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team') | |
total_teams = indv_teams.Team.values.tolist() | |
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player') | |
total_hitters = indv_hitters.Player.values.tolist() | |
indv_pitchers = hitter_gamelog_table.drop_duplicates(subset='Player') | |
total_pitchers = indv_pitchers.Player.values.tolist() | |
total_dates = hitter_gamelog_table.Date.values.tolist() | |
tab1, tab2 = st.tabs(['Hitter Gamelogs', 'Pitcher Gamelogs']) | |
with tab1: | |
st.info(t_stamp) | |
col1, col2 = st.columns([1, 9]) | |
with col1: | |
if st.button("Reset Data", key='reset1'): | |
st.cache_data.clear() | |
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines() | |
t_stamp = f"Updated through: " + str(timestamp) + f" CST" | |
basic_cols = ['Player', 'Team'] | |
basic_season_cols = ['Player', 'Team'] | |
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'] | |
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'] | |
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', | |
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', | |
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'] | |
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', | |
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', | |
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'] | |
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team') | |
total_teams = indv_teams.Team.values.tolist() | |
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player') | |
total_hitters = indv_hitters.Player.values.tolist() | |
indv_pitchers = hitter_gamelog_table.drop_duplicates(subset='Player') | |
total_pitchers = indv_pitchers.Player.values.tolist() | |
total_dates = hitter_gamelog_table.Date.values.tolist() | |
split_var1 = st.radio("What table would you like to view?", ('Season Logs', 'Gamelogs'), key='split_var1') | |
split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='split_var2') | |
if split_var2 == 'Specific Teams': | |
team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='team_var1') | |
elif split_var2 == 'All': | |
team_var1 = total_teams | |
split_var3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates'), key='split_var3') | |
if split_var3 == 'Specific Dates': | |
low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='low_date') | |
if low_date is not None: | |
low_date = pd.to_datetime(low_date).date() | |
high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='high_date') | |
if high_date is not None: | |
high_date = pd.to_datetime(high_date).date() | |
elif split_var3 == 'All': | |
low_date = hitter_gamelog_table['Date'].min() | |
high_date = hitter_gamelog_table['Date'].max() | |
split_var4 = st.radio("Would you like to view all players or specific ones?", ('All', 'Specific Players'), key='split_var4') | |
if split_var4 == 'Specific Players': | |
player_var1 = st.multiselect('Which players would you like to include in the tables?', options = total_hitters, key='player_var1') | |
elif split_var4 == 'All': | |
player_var1 = total_hitters | |
with col2: | |
working_data = hitter_gamelog_table | |
if split_var1 == 'Season Logs': | |
choose_cols = st.container() | |
with choose_cols: | |
choose_disp = st.multiselect('Which stats would you like to view?', options = season_hitter_data_cols, default = season_hitter_data_cols, key='col_display') | |
disp_stats = basic_season_cols + choose_disp | |
display = st.container() | |
working_data = working_data[working_data['Date'] >= low_date] | |
working_data = working_data[working_data['Date'] <= high_date] | |
working_data = working_data[working_data['Team'].isin(team_var1)] | |
working_data = working_data[working_data['Player'].isin(player_var1)] | |
season_long_table = hitter_seasonlong_build(working_data) | |
season_long_table = season_long_table.set_index('Player') | |
season_long_table_disp = season_long_table.reindex(disp_stats,axis="columns") | |
display.dataframe(season_long_table_disp.style.format(precision=2), height=750, use_container_width = True) | |
st.download_button( | |
label="Export hitter seasonlogs Model", | |
data=convert_df_to_csv(season_long_table), | |
file_name='Seasonlogs_Hitter_View.csv', | |
mime='text/csv', | |
) | |
elif split_var1 == 'Gamelogs': | |
choose_cols = st.container() | |
with choose_cols: | |
choose_disp_gamelog = st.multiselect('Which stats would you like to view?', options = hitter_data_cols, default = hitter_data_cols, key='choose_disp_gamelog') | |
gamelog_disp_stats = basic_cols + choose_disp_gamelog | |
working_data = working_data[working_data['Date'] >= low_date] | |
working_data = working_data[working_data['Date'] <= high_date] | |
working_data = working_data[working_data['Team'].isin(team_var1)] | |
working_data = working_data[working_data['Player'].isin(player_var1)] | |
working_data = working_data.reset_index(drop=True) | |
gamelog_data = working_data.reindex(gamelog_disp_stats,axis="columns") | |
display = st.container() | |
bottom_menu = st.columns((4, 1, 1)) | |
with bottom_menu[2]: | |
batch_size = st.selectbox("Page Size", options=[25, 50, 100]) | |
with bottom_menu[1]: | |
total_pages = ( | |
int(len(gamelog_data) / batch_size) if int(len(gamelog_data) / batch_size) > 0 else 1 | |
) | |
current_page = st.number_input( | |
"Page", min_value=1, max_value=total_pages, step=1 | |
) | |
with bottom_menu[0]: | |
st.markdown(f"Page **{current_page}** of **{total_pages}** ") | |
pages = split_frame(gamelog_data, batch_size) | |
# pages = pages.set_index('Player') | |
display.dataframe(data=pages[current_page - 1].style.format(precision=2), height=500, use_container_width=True) | |
st.download_button( | |
label="Export hitter gamelogs model", | |
data=convert_df_to_csv(gamelog_data), | |
file_name='Gamelogs_Hitter_View.csv', | |
mime='text/csv', | |
) | |
with tab2: | |
st.info(t_stamp) | |
col1, col2 = st.columns([1, 9]) | |
with col1: | |
if st.button("Reset Data", key='reset2'): | |
st.cache_data.clear() | |
hitter_gamelog_table, pitcher_gamelog_table, timestamp = init_baselines() | |
t_stamp = f"Updated through: " + str(timestamp) + f" CST" | |
basic_cols = ['Player', 'Team'] | |
basic_season_cols = ['Player', 'Team'] | |
hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'AVG', 'SLG', 'wRC+', 'LD%', 'GB%', 'FB%', 'Hard%', 'Barrels', 'Barrel%'] | |
season_hitter_data_cols = ['G', 'AB', 'PA', 'H', '1B', '2B', '3B', 'HR', 'R', 'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SF', 'SH', | |
'GDP', 'SB', 'CS', 'Avg AVG', 'Avg SLG', 'Avg wRC+', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg Hard%', 'Barrels', 'Avg Barrel%'] | |
pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'K/9', 'BB/9', 'WHIP', 'BABIP', 'LOB%', 'FIP', 'xFIP', 'K%', 'BB%', 'SIERA', 'LD%', 'GB%', | |
'FB%', 'HR/FB', 'Hard%', 'Barrels', 'Barrel%', 'xERA', 'vFA', 'vFT', 'vFC', 'vFS', 'vFO', 'vSI', | |
'vSL', 'vCU', 'vKC', 'vEP', 'vCH', 'vSC', 'vKN'] | |
season_pitcher_data_cols = ['G', 'GS', 'CG', 'W', 'L', 'Avg ERA', 'ShO', 'SV', 'HLD', 'BS', 'IP', 'TBF', 'H', 'R', 'ER', 'HR', | |
'BB', 'IBB', 'HBP', 'WP', 'BK', 'SO', 'Avg K/9', 'Avg BB/9', 'Avg WHIP', 'Avg BABIP', 'Avg LOB%', 'Avg FIP', 'Avg xFIP', 'Avg K%', | |
'Avg BB%', 'Avg SIERA', 'Avg LD%', 'Avg GB%', 'Avg FB%', 'Avg HR/FB', 'Avg Hard%', 'Barrels', 'Avg Barrel%', 'Avg xERA', 'Avg vFA', | |
'Avg vFT', 'Avg vFC', 'Avg vFS', 'Avg vFO', 'Avg vSI', 'Avg vSL', 'Avg vCU', 'Avg vKC', 'Avg vEP', 'Avg vCH', 'Avg vSC', 'Avg vKN'] | |
indv_teams = hitter_gamelog_table.drop_duplicates(subset='Team') | |
total_teams = indv_teams.Team.values.tolist() | |
indv_hitters = hitter_gamelog_table.drop_duplicates(subset='Player') | |
total_hitters = indv_hitters.Player.values.tolist() | |
indv_pitchers = hitter_gamelog_table.drop_duplicates(subset='Player') | |
total_pitchers = indv_pitchers.Player.values.tolist() | |
total_dates = hitter_gamelog_table.Date.values.tolist() | |
split_var1 = st.radio("What table would you like to view?", ('Season Logs', 'Gamelogs'), key='sp_split_var1') | |
split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='sp_split_var2') | |
if split_var2 == 'Specific Teams': | |
team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='sp_team_var1') | |
elif split_var2 == 'All': | |
team_var1 = total_teams | |
split_var3 = st.radio("Would you like to view all dates or specific ones?", ('All', 'Specific Dates'), key='sp_split_var3') | |
if split_var3 == 'Specific Dates': | |
low_date = st.date_input('Min Date:', value=None, format="YYYY-MM-DD", key='sp_low_date') | |
if low_date is not None: | |
low_date = pd.to_datetime(low_date).date() | |
high_date = st.date_input('Max Date:', value=None, format="YYYY-MM-DD", key='sp_high_date') | |
if high_date is not None: | |
high_date = pd.to_datetime(high_date).date() | |
elif split_var3 == 'All': | |
low_date = pitcher_gamelog_table['Date'].min() | |
high_date = pitcher_gamelog_table['Date'].max() | |
split_var4 = st.radio("Would you like to view all players or specific ones?", ('All', 'Specific Players'), key='sp_split_var4') | |
if split_var4 == 'Specific Players': | |
player_var1 = st.multiselect('Which players would you like to include in the tables?', options = total_hitters, key='sp_player_var1') | |
elif split_var4 == 'All': | |
player_var1 = total_hitters | |
with col2: | |
working_data = pitcher_gamelog_table | |
if split_var1 == 'Season Logs': | |
choose_cols = st.container() | |
with choose_cols: | |
choose_disp = st.multiselect('Which stats would you like to view?', options = season_hitter_data_cols, default = season_hitter_data_cols, key='sp_col_display') | |
disp_stats = basic_season_cols + choose_disp | |
display = st.container() | |
working_data = working_data[working_data['Date'] >= low_date] | |
working_data = working_data[working_data['Date'] <= high_date] | |
working_data = working_data[working_data['Team'].isin(team_var1)] | |
working_data = working_data[working_data['Player'].isin(player_var1)] | |
season_long_table = pitcher_seasonlong_build(working_data) | |
season_long_table = season_long_table.set_index('Player') | |
season_long_table_disp = season_long_table.reindex(disp_stats,axis="columns") | |
display.dataframe(season_long_table_disp.style.format(precision=2), height=750, use_container_width = True) | |
st.download_button( | |
label="Export pitcher seasonlogs Model", | |
data=convert_df_to_csv(season_long_table), | |
file_name='Seasonlogs_Pitcher_View.csv', | |
mime='text/csv', | |
) | |
elif split_var1 == 'Gamelogs': | |
choose_cols = st.container() | |
with choose_cols: | |
choose_disp_gamelog = st.multiselect('Which stats would you like to view?', options = hitter_data_cols, default = hitter_data_cols, key='sp_choose_disp_gamelog') | |
gamelog_disp_stats = basic_cols + choose_disp_gamelog | |
working_data = working_data[working_data['Date'] >= low_date] | |
working_data = working_data[working_data['Date'] <= high_date] | |
working_data = working_data[working_data['Team'].isin(team_var1)] | |
working_data = working_data[working_data['Player'].isin(player_var1)] | |
working_data = working_data.reset_index(drop=True) | |
gamelog_data = working_data.reindex(gamelog_disp_stats,axis="columns") | |
display = st.container() | |
bottom_menu = st.columns((4, 1, 1)) | |
with bottom_menu[2]: | |
batch_size = st.selectbox("Page Size", options=[25, 50, 100]) | |
with bottom_menu[1]: | |
total_pages = ( | |
int(len(gamelog_data) / batch_size) if int(len(gamelog_data) / batch_size) > 0 else 1 | |
) | |
current_page = st.number_input( | |
"Page", min_value=1, max_value=total_pages, step=1 | |
) | |
with bottom_menu[0]: | |
st.markdown(f"Page **{current_page}** of **{total_pages}** ") | |
pages = split_frame(gamelog_data, batch_size) | |
# pages = pages.set_index('Player') | |
display.dataframe(data=pages[current_page - 1].style.format(precision=2), height=500, use_container_width=True) | |
st.download_button( | |
label="Export pitcher gamelogs model", | |
data=convert_df_to_csv(gamelog_data), | |
file_name='Gamelogs_Hitter_View.csv', | |
mime='text/csv', | |
) |