Spaces:
Sleeping
Sleeping
import duckdb | |
import pandas as pd | |
import os | |
from typing import Callable | |
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000") | |
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/" | |
FANTASY_POSITIONS = [ | |
"QB", | |
"RB", | |
"WR", | |
"TE", | |
"FB", | |
"K", | |
] | |
def get_snap_counts(season_int: int) -> pd.DataFrame: | |
df = duckdb.sql(f"SELECT * from snap_counts_snap_counts_{season_int}").df() | |
df["fantasy_position"] = df["position"].isin(FANTASY_POSITIONS) | |
return df | |
def get_play_by_play(season_int: int) -> pd.DataFrame: | |
df = duckdb.sql(f"SELECT * from pbp_play_by_play_{season_int}").df() | |
return df | |
def get_player_stats(season_int: int) -> pd.DataFrame: | |
df = duckdb.sql("SELECT * from player_stats_player_stats").df() | |
return df | |
def get_ftn_charting(season_int: int) -> pd.DataFrame: | |
df = duckdb.sql(f"SELECT * from ftn_charting_ftn_charting_{season_int}").df() | |
return df | |
def get_pbp_participation(season_int: int) -> pd.DataFrame: | |
df = duckdb.sql( | |
f""" | |
SELECT | |
a.* | |
, b.week | |
, b.down | |
, b.qtr | |
, b.ydstogo | |
, b.play_type | |
, b.pass_length | |
, b.pass_location | |
, 1 as count_col | |
from pbp_participation_pbp_participation_{season_int} a | |
left join pbp_play_by_play_{season_int} b | |
on a.play_id = b.play_id | |
and a.nflverse_game_id = b.game_id | |
where b.week is not null | |
""" | |
).df() | |
return df | |
def get_nextgen_stats(season_int: int, stat_category: str) -> pd.DataFrame: | |
df = duckdb.sql(f"SELECT * from nextgen_stats_ngs_{stat_category} where season = {season_int}").df() | |
return df | |
SEASON = "2023" | |
NFLVERSE_ASSETS = [ | |
("ftn_charting", f"ftn_charting_{SEASON}.parquet"), | |
("espn_data", "qbr_season_level.parquet"), | |
("espn_data", "qbr_week_level.parquet"), | |
("players", "players.parquet"), | |
("pbp_participation", f"pbp_participation_{SEASON}.parquet"), | |
("snap_counts", f"snap_counts_{SEASON}.parquet"), | |
("player_stats", f"player_stats_{SEASON}.parquet"), | |
("player_stats", f"player_stats_def_{SEASON}.parquet"), | |
("player_stats", f"player_stats_kicking_{SEASON}.parquet"), | |
("pfr_advstats", "advstats_season_def.parquet"), | |
("pfr_advstats", "advstats_season_pass.parquet"), | |
("pfr_advstats", "advstats_season_rec.parquet"), | |
("pfr_advstats", "advstats_season_rush.parquet"), | |
("pfr_advstats", f"advstats_week_def_{SEASON}.parquet"), | |
("pfr_advstats", f"advstats_week_pass_{SEASON}.parquet"), | |
("pfr_advstats", f"advstats_week_rec_{SEASON}.parquet"), | |
("pfr_advstats", f"advstats_week_rush_{SEASON}.parquet"), | |
("pbp", f"play_by_play_{SEASON}.parquet"), | |
("nextgen_stats", "ngs_passing.parquet"), | |
("nextgen_stats", "ngs_receiving.parquet"), | |
("nextgen_stats", "ngs_rushing.parquet"), | |
] | |
class NflVerseDataAsset: | |
def __init__( | |
self, | |
release_tag: str, | |
asset_name: str, | |
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x, | |
): | |
self.release_tag = release_tag | |
self.asset_name = asset_name | |
self.dataframe_mutation_fxn = dataframe_mutation_fxn | |
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}" | |
def load_parquet_asset_to_df(self) -> pd.DataFrame: | |
location = os.path.join(BASE_URL, self.release_tag, self.asset_name) | |
df = pd.read_parquet(location) | |
return df | |
def register_asset_to_duckdb(self) -> None: | |
df = self.load_parquet_asset_to_df() | |
df = self.dataframe_mutation_fxn(df) | |
duckdb.register(self.table_name, df) | |
def load_assets(): | |
for tag, asset in NFLVERSE_ASSETS: | |
asset = NflVerseDataAsset(tag, asset) | |
asset.register_asset_to_duckdb() | |
def get_current_tables() -> list[str]: | |
current_tables_df = duckdb.sql("SHOW TABLES").df() | |
return current_tables_df["name"].tolist() | |