import plotly.express as px import plotly.graph_objects as go import plotly.colors as pc from scipy.stats import gaussian_kde import numpy as np import pandas as pd import gradio as gr from translate import max_pitch_types from data import df, pitch_stats, league_pitch_stats # GRADIO FUNCTIONS # location maps def fit_pred_kde(data, X, Y): kde = gaussian_kde(data) return kde(np.stack((X, Y)).reshape(2, -1)).reshape(*X.shape) plot_s = 256 sz_h = 200 sz_w = 160 h_h = 200 - 40*2 h_w = 160 - 32*2 kde_range = np.arange(-plot_s/2, plot_s/2, 1) X, Y = np.meshgrid( kde_range, kde_range ) def coordinatify(h, w): return dict( x0=-w/2, y0=-h/2, x1=w/2, y1=h/2 ) colorscale = pc.sequential.OrRd colorscale = [ [0, 'rgba(0, 0, 0, 0)'], ] + [ [i / len(colorscale), color] for i, color in enumerate(colorscale, start=1) ] def plot_pitch_map(player=None, loc=None, pitch_type=None, pitch_name=None, all_pitches=False, min_pitches=2): assert not ((loc is None and player is None) or (loc is not None and player is not None)), 'exactly one of `player` or `loc` must be specified' if loc is None and player is not None: if all_pitches: assert not (pitch_type is not None or pitch_name is not None), 'cannot have `pitch_type` or `pitch_name` when `all_pitches` is `True`' loc = df.sort_values('name').set_index('name').loc[player, ['plate_x', 'plate_z']] else: assert not ((pitch_type is None and pitch_name is None) or (pitch_type is not None and pitch_name is not None)), 'exactly one of `pitch_type` or `pitch_name` must be specified' pitch_val = pitch_type or pitch_name pitch_col = 'pitch_type' if pitch_type else 'pitch_name' loc = df.sort_values('name').set_index(['name', pitch_col]).loc[(player, pitch_val), ['plate_x', 'plate_z']] fig = go.Figure() if len(loc) >= min_pitches: Z = fit_pred_kde(loc.to_numpy().T, X, Y) fig.add_shape( type="rect", **coordinatify(sz_h, sz_w), line_color='gray', # fillcolor='rgba(220, 220, 220, 0.75)', #gainsboro ) fig.add_shape( type="rect", **coordinatify(h_h, h_w), line_color='dimgray', ) fig.add_trace(go.Contour( z=Z, x=kde_range, y=kde_range, colorscale=colorscale, zmin=1e-5, zmax=Z.max(), contours={ 'start': 1e-5, 'end': Z.max(), 'size': Z.max() / 6 }, showscale=False )) else: fig.add_annotation( x=0, y=0, text=f'No visualization
as less than {min_pitches} pitches thrown', showarrow=False ) fig.update_layout( xaxis=dict(range=[-plot_s/2, plot_s/2+1], showticklabels=False), yaxis=dict(range=[-plot_s/2, plot_s/2+1], scaleanchor='x', scaleratio=1, showticklabels=False), # width=384, # height=384 ) return fig # def plot_empty_pitch_map(): # fig = go.Figure() # fig.add_annotation( # x=0, # y=0, # text='No visualization
as less than 10 pitches thrown', # showarrow=False # ) # fig.update_layout( # xaxis=dict(range=[-plot_s/2, plot_s/2+1], showticklabels=False), # yaxis=dict(range=[-plot_s/2, plot_s/2+1], scaleanchor='x', scaleratio=1, showticklabels=False), # # width=384, # # height=384 # ) # return fig # velo distribution def plot_pitch_velo(player=None, velos=None, pitch_type=None, pitch_name=None, min_pitches=2): assert not ((velos is None and player is None) or (velos is not None and player is not None)), 'exactly one of `player` or `loc` must be specified' if velos is None and player is not None: assert not ((pitch_type is None and pitch_name is None) or (pitch_type is not None and pitch_name is not None)), 'exactly one of `pitch_type` or `pitch_name` must be specified' pitch_val = pitch_type or pitch_name pitch_col = 'pitch_type' if pitch_type else 'pitch_name' velos = df.set_index(['name', pitch_col]).loc[(player, pitch_val), 'release_speed'] if isinstance(velos, int): velos = [velos] fig = go.Figure() if len(velos) >= min_pitches: fig = fig.add_trace(go.Violin(x=velos, side='positive', hoveron='points', points=False, meanline_visible=True, name='Velocity Distribution')) median = velos.median() x_range = [median-25, median+25] else: fig.add_annotation( x=(170+125)/2, y=0.3/2, text=f'No visualization
as less than {min_pitches} pitches thrown', showarrow=False, ) x_range = [125, 170] fig.update_layout( xaxis=dict( title='Velocity', range=x_range, scaleratio=2 ), yaxis=dict( title='Frequency', range=[0, 0.3], scaleanchor='x', scaleratio=1, tickvals=np.linspace(0, 0.3, 3), ticktext=np.linspace(0, 0.3, 3), ), autosize=True, # width=512, # height=256, modebar_remove=['zoom', 'autoScale', 'resetScale'], ) return fig # def plot_empty_pitch_velo(): # fig = go.Figure() # fig.add_annotation( # x=(170+125)/2, # y=0.3/2, # text='No visualization
as less than 10 pitches thrown', # showarrow=False, # ) # fig.update_layout( # xaxis=dict( # title='Velocity', # range=[125, 170], # scaleratio=2 # ), # yaxis=dict( # title='Frequency', # range=[0, 0.3], # scaleanchor='x', # scaleratio=1, # # tickvals=np.linspace(0, 0.3, 3), # # ticktext=np.linspace(0, 0.3, 3), # tickvals=[0.15], # ticktext=[0.15] # ), # autosize=True, # # width=512, # # height=256, # modebar_remove=['zoom', 'autoScale', 'resetScale'], # ) # return fig def plot_all_pitch_velo(player=None, player_df=None, pitch_counts=None, min_pitches=2): # assert not ((player is None and player_df is None) or (player is not None and player_df is not None)), 'exactly one of `player` or `player_df` must be specified' if player_df is None and player is not None: assert pitch_counts is None, '`pitch_counts` must be `None` if `player_df` is None' player_df = df.sort_values('name').set_index('name').loc[player].sort_values('pitch_name').set_index('pitch_name') pitch_counts = player_df.index.value_counts(ascending=True) league_df = df.set_index('pitch_name') fig = go.Figure() velo_center = (player_df['release_speed'].min() + player_df['release_speed'].max()) / 2 for i, (pitch_name, count) in enumerate(pitch_counts.items()): velos = player_df.loc[pitch_name, 'release_speed'] league_velos = league_df.loc[pitch_name, 'release_speed'] fig.add_trace(go.Violin( x=league_velos, y=[pitch_name]*len(league_velos), line_color='gray', side='positive', orientation='h', meanline_visible=True, points=False, legendgroup='NPB', legendrank=1, # visible='legendonly', showlegend=False, name='NPB', )) if count >= min_pitches: fig.add_trace(go.Violin( x=velos, y=[pitch_name]*len(velos), side='positive', orientation='h', meanline_visible=True, points=False, legendgroup=pitch_name, legendrank=2+(len(pitch_counts) - i), name=pitch_name )) else: fig.add_trace(go.Scatter( x=[velo_center], y=[pitch_name], text=[f'No visualization as less than {min_pitches} pitches thrown'], textposition='top center', hovertext=False, mode="lines+text", legendgroup=pitch_name, legendrank=2+(len(pitch_counts) - i), name=pitch_name, )) fig.add_trace(go.Violin( x=league_df['release_speed'], y=[player]*len(league_df), line_color='gray', side='positive', orientation='h', meanline_visible=True, points=False, legendgroup='NPB', legendrank=1, # visible='legendonly', name='NPB', )) fig.add_trace(go.Violin( x=player_df['release_speed'], y=[player]*len(player_df), side='positive', orientation='h', meanline_visible=True, points=False, legendrank=0, name=player )) fig.update_xaxes(title='Velocity', range=[player_df['release_speed'].dropna().min() - 2, player_df['release_speed'].dropna().max() + 2]) fig.update_yaxes(range=[0, len(pitch_counts)+1-0.25], visible=False) fig.update_layout(violingap=0, violingroupgap=0, legend=dict(orientation='h', y=-0.15, yanchor='top')) return fig def get_data(player): player_name = f'# {player}' _df = df.sort_values('name').set_index('name').loc[player] _df.to_csv(f'files/npb.csv', index=False) _df_by_pitch_name = _df.set_index('pitch_name').sort_values('pitch_name') usage_fig = px.pie(_df['pitch_name'], names='pitch_name') usage_fig.update_traces(texttemplate='%{percent:.1%}', hovertemplate=f'{player}
' + 'threw a %{label}
%{percent:.1%} of the time (%{value} pitches)') pitch_counts = _df['pitch_name'].value_counts() pitch_velo_summary = plot_all_pitch_velo(player=player, player_df=_df_by_pitch_name, pitch_counts=pitch_counts.sort_values(ascending=True)) pitch_loc_summary = plot_pitch_map(player, all_pitches=True) pitch_groups = [] pitch_names = [] pitch_infos = [] pitch_velos = [] pitch_maps = [] for pitch_name, count in pitch_counts.items(): pitch_groups.append(gr.update(visible=True)) pitch_names.append(gr.update(value=f'### {pitch_name}', visible=True)) pitch_infos.append(gr.update( value=pd.DataFrame([{ 'Whiff%': pitch_stats.loc[(player, pitch_name), 'Whiff%'].item(), 'CSW%': pitch_stats.loc[(player, pitch_name), 'CSW%'].item() }]), visible=True )) pitch_velos.append(gr.update( value=plot_pitch_velo(velos=_df_by_pitch_name.loc[pitch_name, 'release_speed']), visible=True )) pitch_maps.append(gr.update( value=plot_pitch_map(player, pitch_name=pitch_name), label='Pitch location', visible=True )) for _ in range(max_pitch_types - len(pitch_names)): pitch_groups.append(gr.update(visible=False)) pitch_names.append(gr.update(value=None, visible=False)) pitch_infos.append(gr.update(value=None, visible=False)) for _ in range(max_pitch_types - len(pitch_maps)): pitch_velos.append(gr.update(value=None, visible=False)) pitch_maps.append(gr.update(value=None, visible=False)) velo_stats = pd.concat([pitch_stats.loc[player, 'Velocity'].rename('Avg. Velo'), league_pitch_stats['Velocity'].rename('League Avg. Velo')], join='inner', axis=1).rename_axis(['Pitch']).reset_index() return player_name, 'files/npb.csv', usage_fig, pitch_velo_summary, pitch_loc_summary, *pitch_groups, *pitch_names, *pitch_infos, *pitch_velos, *pitch_maps, velo_stats