import plotly.express as px import plotly.graph_objects as go import plotly.colors as pc from scipy.stats import gaussian_kde import numpy as np # import pandas as pd import polars as pl import gradio as gr from translate import max_pitch_types from data import ( df, pitch_stats, rhb_pitch_stats,lhb_pitch_stats, league_pitch_stats, rhb_league_pitch_stats, lhb_league_pitch_stats ) # GRADIO FUNCTIONS # location maps def fit_pred_kde(data, X, Y): kde = gaussian_kde(data) return kde(np.stack((X, Y)).reshape(2, -1)).reshape(*X.shape) plot_s = 256 sz_h = 200 sz_w = 160 h_h = 200 - 40*2 h_w = 160 - 32*2 kde_range = np.arange(-plot_s/2, plot_s/2, 1) X, Y = np.meshgrid( kde_range, kde_range ) def coordinatify(h, w): return dict( x0=-w/2, y0=-h/2, x1=w/2, y1=h/2 ) colorscale = pc.sequential.OrRd colorscale = [ [0, 'rgba(0, 0, 0, 0)'], ] + [ [i / len(colorscale), color] for i, color in enumerate(colorscale, start=1) ] def plot_pitch_map(df, player=None, loc=None, pitch_type=None, pitch_name=None, all_pitches=False, min_pitches=2): assert not ((loc is None and player is None) or (loc is not None and player is not None)), 'exactly one of `player` or `loc` must be specified' if loc is None and player is not None: if all_pitches: assert not (pitch_type is not None or pitch_name is not None), 'cannot have `pitch_type` or `pitch_name` when `all_pitches` is `True`' # loc = df.set_index('name').sort_index().loc[player, ['plate_x', 'plate_z']] loc = df.filter(pl.col('name') == player).select(['plate_x', 'plate_z']) else: assert not ((pitch_type is None and pitch_name is None) or (pitch_type is not None and pitch_name is not None)), 'exactly one of `pitch_type` or `pitch_name` must be specified' pitch_val = pitch_type or pitch_name pitch_col = 'pitch_type' if pitch_type else 'pitch_name' # loc = df.sort_index().set_index(['name', pitch_col]).sort_index().loc[(player, pitch_val), ['plate_x', 'plate_z']] loc = df.filter((pl.col('name') == player) & (pl.col(pitch_col) == pitch_val)).select(['plate_x', 'plate_z']) fig = go.Figure() if len(loc) >= min_pitches: Z = fit_pred_kde(loc.to_numpy().T, X, Y) fig.add_shape( type="rect", **coordinatify(sz_h, sz_w), line_color='gray', # fillcolor='rgba(220, 220, 220, 0.75)', #gainsboro ) fig.add_shape( type="rect", **coordinatify(h_h, h_w), line_color='dimgray', ) fig.add_trace(go.Contour( z=Z, x=kde_range, y=kde_range, colorscale=colorscale, zmin=1e-5, zmax=Z.max(), contours={ 'start': 1e-5, 'end': Z.max(), 'size': Z.max() / 5 }, showscale=False )) else: fig.add_annotation( x=0, y=0, text=f'No visualization
as less than {min_pitches} pitches thrown', showarrow=False ) fig.update_layout( xaxis=dict(range=[-plot_s/2, plot_s/2+1], showticklabels=False), yaxis=dict(range=[-plot_s/2, plot_s/2+1], scaleanchor='x', scaleratio=1, showticklabels=False), # width=384, # height=384 ) return fig # velo distribution def plot_pitch_velo(df=None, player=None, velos=None, pitch_type=None, pitch_name=None, min_pitches=2): assert not ((velos is None and player is None) or (velos is not None and player is not None)), 'exactly one of `player` or `velos` must be specified' if velos is None and player is not None: assert not ((pitch_type is None and pitch_name is None) or (pitch_type is not None and pitch_name is not None)), 'exactly one of `pitch_type` or `pitch_name` must be specified' assert df is not None, '`df` must be provided if `velos` not provided' pitch_val = pitch_type or pitch_name pitch_col = 'pitch_type' if pitch_type else 'pitch_name' # velos = df.set_index(['name', pitch_col]).sort_index().loc[(player, pitch_val), 'release_speed'] velos = df.filter((pl.col('name') == player) & (pl.col(pitch_col) == pitch_val))['release_speed'] if isinstance(velos, int): velos = [velos] # is this line still necessary after porting to polars? fig = go.Figure() if len(velos) >= min_pitches: fig = fig.add_trace(go.Violin(x=velos, side='positive', hoveron='points', points=False, meanline_visible=True, name='Velocity Distribution')) median = velos.median() x_range = [median-25, median+25] else: fig.add_annotation( x=(170+125)/2, y=0.3/2, text=f'No visualization
as less than {min_pitches} pitches thrown', showarrow=False, ) x_range = [125, 170] fig.update_layout( xaxis=dict( title='Velocity', range=x_range, scaleratio=2 ), yaxis=dict( title='Frequency', range=[0, 0.3], scaleanchor='x', scaleratio=1, tickvals=np.linspace(0, 0.3, 3), ticktext=np.linspace(0, 0.3, 3), ), autosize=True, # width=512, # height=256, modebar_remove=['zoom', 'autoScale', 'resetScale'], ) return fig def plot_all_pitch_velo(df=None, player=None, player_df=None, pitch_counts=None, min_pitches=2): # assert not ((player is None and player_df is None) or (player is not None and player_df is not None)), 'exactly one of `player` or `player_df` must be specified' if player_df is None and player is not None: assert pitch_counts is None, '`pitch_counts` must be `None` if `player_df` is None' assert df is not None, '`df` must be provided if `player_df` is None' # player_df = df.set_index('name').sort_index().loc[player].sort_values('pitch_name').set_index('pitch_name') # pitch_counts = player_df.index.value_counts(ascending=True) player_df = df.filter((pl.col('name') == player) & (pl.col('release_speed').is_not_null())) pitch_counts = player_df['pitch_name'].value_counts().sort('count') # league_df = df.set_index('pitch_name').sort_index() league_df = df.filter(pl.col('release_speed').is_not_null()) fig = go.Figure() velo_center = (player_df['release_speed'].min() + player_df['release_speed'].max()) / 2 # for i, (pitch_name, count) in enumerate(pitch_counts.items()): for i, (pitch_name, count) in enumerate(pitch_counts.iter_rows()): # velos = player_df.loc[pitch_name, 'release_speed'] # league_velos = league_df.loc[pitch_name, 'release_speed'] velos = player_df.filter(pl.col('pitch_name') == pitch_name)['release_speed'] league_velos = league_df.filter(pl.col('pitch_name') == pitch_name)['release_speed'] fig.add_trace(go.Violin( x=league_velos, y=[pitch_name]*len(league_velos), line_color='gray', side='positive', orientation='h', meanline_visible=True, points=False, legendgroup='NPB', legendrank=1, # visible='legendonly', showlegend=False, name='NPB', )) if count >= min_pitches: fig.add_trace(go.Violin( x=velos, y=[pitch_name]*len(velos), side='positive', orientation='h', meanline_visible=True, points=False, legendgroup=pitch_name, legendrank=2+(len(pitch_counts) - i), name=pitch_name )) else: fig.add_trace(go.Scatter( x=[velo_center], y=[pitch_name], text=[f'No visualization as less than {min_pitches} pitches thrown'], textposition='top center', hovertext=False, mode="lines+text", legendgroup=pitch_name, legendrank=2+(len(pitch_counts) - i), name=pitch_name, )) fig.add_trace(go.Violin( x=league_df['release_speed'], y=[player]*len(league_df), line_color='gray', side='positive', orientation='h', meanline_visible=True, points=False, legendgroup='NPB', legendrank=1, # visible='legendonly', name='NPB', )) fig.add_trace(go.Violin( x=player_df['release_speed'], y=[player]*len(player_df), side='positive', orientation='h', meanline_visible=True, points=False, legendrank=0, name=player )) # fig.update_xaxes(title='Velocity', range=[player_df['release_speed'].dropna().min() - 2, player_df['release_speed'].dropna().max() + 2]) fig.update_xaxes(title='Velocity', range=[player_df['release_speed'].min() - 2, player_df['release_speed'].max() + 2]) fig.update_yaxes(range=[0, len(pitch_counts)+1-0.25], visible=False) fig.update_layout(violingap=0, violingroupgap=0, legend=dict(orientation='h', y=-0.15, yanchor='top')) return fig def get_data(player, handedness): player_name = f'# {player}' # _df = df.set_index('name').sort_index().loc[player] # _df.to_csv(f'files/npb.csv', index=False) # _df_by_pitch_name = _df.set_index('pitch_name').sort_index() _df = df.filter(pl.col('name') == player) league_df = df _pitch_stats = pitch_stats _league_pitch_stats = league_pitch_stats if handedness == 'Right': _df = _df.filter(pl.col('stand') == 'R') league_df = league_df.filter(pl.col('stand') == 'R') _pitch_stats = rhb_pitch_stats _league_pitch_stats = rhb_league_pitch_stats elif handedness == 'Left': _df = _df.filter(pl.col('stand') == 'L') league_df = league_df.filter(pl.col('stand') == 'L') _pitch_stats = lhb_pitch_stats _league_pitch_stats = lhb_league_pitch_stats handedness = gr.update(value=handedness, interactive=True) # usage_fig = px.pie(_df['pitch_name'], names='pitch_name') usage_fig = px.pie(_df.select('pitch_name'), names='pitch_name') usage_fig.update_traces(texttemplate='%{percent:.1%}', hovertemplate=f'{player}
' + 'threw a %{label}
%{percent:.1%} of the time (%{value} pitches)') pitch_counts = _df['pitch_name'].value_counts().sort('count', descending=True) # pitch_velo_summary = plot_all_pitch_velo(player=player, player_df=_df_by_pitch_name, pitch_counts=pitch_counts.sort_values(ascending=True)) pitch_velo_summary = plot_all_pitch_velo(df=league_df, player=player, player_df=_df.filter(pl.col('release_speed').is_not_null()), pitch_counts=pitch_counts.sort('count', descending=False)) pitch_loc_summary = plot_pitch_map(df=_df, player=player, all_pitches=True) pitch_groups = [] pitch_names = [] pitch_infos = [] pitch_velos = [] pitch_maps = [] # for pitch_name, count in pitch_counts.items(): for pitch_name, count in pitch_counts.iter_rows(): pitch_groups.append(gr.update(visible=True)) pitch_names.append(gr.update(value=f'### {pitch_name}', visible=True)) pitch_infos.append(gr.update( # value=pd.DataFrame([{ # 'Whiff%': pitch_stats.loc[(player, pitch_name), 'Whiff%'].item(), # 'CSW%': pitch_stats.loc[(player, pitch_name), 'CSW%'].item() # }]), value=_pitch_stats.filter((pl.col('name') == player) & (pl.col('pitch_name') == pitch_name)).select(['Whiff%', 'CSW%']), visible=True )) pitch_velos.append(gr.update( # value=plot_pitch_velo(velos=_df_by_pitch_name.loc[pitch_name, 'release_speed']), value=plot_pitch_velo(velos=_df.filter(pl.col('pitch_name') == pitch_name)['release_speed']), visible=True )) pitch_maps.append(gr.update( value=plot_pitch_map(df=_df, player=player, pitch_name=pitch_name), label='Pitch location', visible=True )) for _ in range(max_pitch_types - len(pitch_names)): pitch_groups.append(gr.update(visible=False)) pitch_names.append(gr.update(value=None, visible=False)) pitch_infos.append(gr.update(value=None, visible=False)) for _ in range(max_pitch_types - len(pitch_maps)): pitch_velos.append(gr.update(value=None, visible=False)) pitch_maps.append(gr.update(value=None, visible=False)) # velo_stats = pd.concat([pitch_stats.loc[player, 'Velocity'].rename('Avg. Velo'), league_pitch_stats['Velocity'].rename('League Avg. Velo')], join='inner', axis=1).rename_axis(['Pitch']).reset_index() velo_stats = ( _pitch_stats .filter(pl.col('name') == player) .select(pl.col('pitch_name').alias('Pitch'), pl.col('Velocity').alias('Avg. Velo'), pl.col('Count')) .join( _league_pitch_stats.select(pl.col('pitch_name').alias('Pitch'), pl.col('Velocity').alias('League Avg. Velo')), on='Pitch', how='inner' ) .sort('Count', descending=True) .drop('Count') ) return player_name, handedness, 'files/npb.csv', usage_fig, pitch_velo_summary, pitch_loc_summary, *pitch_groups, *pitch_names, *pitch_infos, *pitch_velos, *pitch_maps, velo_stats