Jon Solow
Add next gen stats page
4569c98
raw
history blame
4.17 kB
import duckdb
import pandas as pd
import os
from typing import Callable
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000")
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
FANTASY_POSITIONS = [
"QB",
"RB",
"WR",
"TE",
"FB",
"K",
]
def get_snap_counts(season_int: int) -> pd.DataFrame:
df = duckdb.sql(f"SELECT * from snap_counts_snap_counts_{season_int}").df()
df["fantasy_position"] = df["position"].isin(FANTASY_POSITIONS)
return df
def get_play_by_play(season_int: int) -> pd.DataFrame:
df = duckdb.sql(f"SELECT * from pbp_play_by_play_{season_int}").df()
return df
def get_player_stats(season_int: int) -> pd.DataFrame:
df = duckdb.sql("SELECT * from player_stats_player_stats").df()
return df
def get_ftn_charting(season_int: int) -> pd.DataFrame:
df = duckdb.sql(f"SELECT * from ftn_charting_ftn_charting_{season_int}").df()
return df
def get_pbp_participation(season_int: int) -> pd.DataFrame:
df = duckdb.sql(
f"""
SELECT
a.*
, b.week
, b.down
, b.qtr
, b.ydstogo
, b.play_type
, b.pass_length
, b.pass_location
, 1 as count_col
from pbp_participation_pbp_participation_{season_int} a
left join pbp_play_by_play_{season_int} b
on a.play_id = b.play_id
and a.nflverse_game_id = b.game_id
where b.week is not null
"""
).df()
return df
def get_nextgen_stats(season_int: int, stat_category: str) -> pd.DataFrame:
df = duckdb.sql(f"SELECT * from nextgen_stats_ngs_{stat_category} where season = {season_int}").df()
return df
SEASON = "2023"
NFLVERSE_ASSETS = [
("ftn_charting", f"ftn_charting_{SEASON}.parquet"),
("espn_data", "qbr_season_level.parquet"),
("espn_data", "qbr_week_level.parquet"),
("players", "players.parquet"),
("pbp_participation", f"pbp_participation_{SEASON}.parquet"),
("snap_counts", f"snap_counts_{SEASON}.parquet"),
("player_stats", f"player_stats_{SEASON}.parquet"),
("player_stats", f"player_stats_def_{SEASON}.parquet"),
("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
("pfr_advstats", "advstats_season_def.parquet"),
("pfr_advstats", "advstats_season_pass.parquet"),
("pfr_advstats", "advstats_season_rec.parquet"),
("pfr_advstats", "advstats_season_rush.parquet"),
("pfr_advstats", f"advstats_week_def_{SEASON}.parquet"),
("pfr_advstats", f"advstats_week_pass_{SEASON}.parquet"),
("pfr_advstats", f"advstats_week_rec_{SEASON}.parquet"),
("pfr_advstats", f"advstats_week_rush_{SEASON}.parquet"),
("pbp", f"play_by_play_{SEASON}.parquet"),
("nextgen_stats", "ngs_passing.parquet"),
("nextgen_stats", "ngs_receiving.parquet"),
("nextgen_stats", "ngs_rushing.parquet"),
]
class NflVerseDataAsset:
def __init__(
self,
release_tag: str,
asset_name: str,
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
):
self.release_tag = release_tag
self.asset_name = asset_name
self.dataframe_mutation_fxn = dataframe_mutation_fxn
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}"
def load_parquet_asset_to_df(self) -> pd.DataFrame:
location = os.path.join(BASE_URL, self.release_tag, self.asset_name)
df = pd.read_parquet(location)
return df
def register_asset_to_duckdb(self) -> None:
df = self.load_parquet_asset_to_df()
df = self.dataframe_mutation_fxn(df)
duckdb.register(self.table_name, df)
def load_assets():
for tag, asset in NFLVERSE_ASSETS:
asset = NflVerseDataAsset(tag, asset)
asset.register_asset_to_duckdb()
def get_current_tables() -> list[str]:
current_tables_df = duckdb.sql("SHOW TABLES").df()
return current_tables_df["name"].tolist()