import duckdb import pandas as pd import os from typing import Callable from domain.constants import SEASON duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000") BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/" FANTASY_POSITIONS = [ "QB", "RB", "WR", "TE", "FB", "K", ] def get_weekly_rosters(season_int: int | str = SEASON) -> pd.DataFrame: df = duckdb.sql( f""" with grouped_stats as ( select player_id, sum(fantasy_points) as fantasy_points from player_stats_player_stats_{season_int} group by player_id ) SELECT team , wr.position , wr.jersey_number , wr.status , wr.full_name , wr.headshot_url , wr.week , wr.gsis_id , gs.fantasy_points from weekly_rosters_roster_weekly_{season_int} wr left join grouped_stats gs on wr.gsis_id = gs.player_id """ ).df() return df def get_player_stats(season_int: int | str = SEASON) -> pd.DataFrame: df = duckdb.sql(f"SELECT * from player_stats_player_stats_{season_int}").df() return df NFLVERSE_ASSETS = [ ("weekly_rosters", f"roster_weekly_{SEASON}.parquet"), ("player_stats", f"player_stats_{SEASON}.parquet"), ] class NflVerseDataAsset: def __init__( self, release_tag: str, asset_name: str, dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x, ): self.release_tag = release_tag self.asset_name = asset_name self.dataframe_mutation_fxn = dataframe_mutation_fxn self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}" def load_parquet_asset_to_df(self) -> pd.DataFrame: location = os.path.join(BASE_URL, self.release_tag, self.asset_name) df = pd.read_parquet(location) return df def register_asset_to_duckdb(self) -> None: df = self.load_parquet_asset_to_df() df = self.dataframe_mutation_fxn(df) duckdb.register(self.table_name, df) def load_assets(): for tag, asset in NFLVERSE_ASSETS: asset = NflVerseDataAsset(tag, asset) asset.register_asset_to_duckdb() def get_current_tables() -> list[str]: current_tables_df = duckdb.sql("SHOW TABLES").df() return current_tables_df["name"].tolist() def load_assets_if_no_tables(): if not len(get_current_tables()): load_assets()