import duckdb import pandas as pd import os import streamlit as st from typing import Callable from domain.constants import SEASON duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000") BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/" FANTASY_POSITIONS = [ "QB", "RB", "WR", "TE", "FB", "K", ] def get_weekly_rosters(season_int: int | str = SEASON) -> pd.DataFrame: df = duckdb.sql( f""" with grouped_stats as ( select player_id, sum(fantasy_points) as fantasy_points from player_stats_player_stats_{season_int} group by player_id ) SELECT team , wr.position , wr.jersey_number , wr.status , wr.full_name , wr.headshot_url , wr.week , wr.gsis_id , gs.fantasy_points from weekly_rosters_roster_weekly_{season_int} wr left join grouped_stats gs on wr.gsis_id = gs.player_id """ ).df() return df def get_player_stats(season_int: int | str = SEASON) -> pd.DataFrame: df = duckdb.sql( f"""SELECT player_id , recent_team as team , week , passing_tds , passing_yards , passing_2pt_conversions , sack_fumbles_lost , interceptions , rushing_tds , rushing_yards , rushing_2pt_conversions , rushing_fumbles_lost , receptions , receiving_tds , receiving_yards , receiving_2pt_conversions , receiving_fumbles_lost , special_teams_tds from player_stats_player_stats_{season_int}""" ).df() return df def get_player_kicking_stats(season_int: int | str = SEASON) -> pd.DataFrame: df = duckdb.sql( f"""SELECT player_id , week , pat_made , fg_made_0_19 , fg_made_20_29 , fg_made_30_39 , fg_made_40_49 , fg_made_50_59 , fg_made_60_ from player_stats_player_stats_kicking_{season_int}""" ).df() return df.fillna(0.0) def get_team_defense_stats(season_int: int | str = SEASON) -> pd.DataFrame: df = duckdb.sql( f""" select week , team , sum(def_sacks) as def_sacks , sum(def_interceptions) as def_interceptions , sum(def_tds) as def_tds , sum(def_fumble_recovery_opp) as def_fumble_recovery_opp , sum(def_safety) as def_safety from player_stats_player_stats_def_{season_int} group by week, team """ ).df() return df NFLVERSE_ASSETS = [ ("weekly_rosters", f"roster_weekly_{SEASON}.parquet"), ("player_stats", f"player_stats_{SEASON}.parquet"), ("player_stats", f"player_stats_kicking_{SEASON}.parquet"), ("player_stats", f"player_stats_def_{SEASON}.parquet"), ] class NflVerseDataAsset: def __init__( self, release_tag: str, asset_name: str, dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x, ): self.release_tag = release_tag self.asset_name = asset_name self.dataframe_mutation_fxn = dataframe_mutation_fxn self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}" def load_parquet_asset_to_df(self) -> pd.DataFrame: location = os.path.join(BASE_URL, self.release_tag, self.asset_name) df = pd.read_parquet(location) return df def register_asset_to_duckdb(self) -> None: df = self.load_parquet_asset_to_df() df = self.dataframe_mutation_fxn(df) duckdb.register(self.table_name, df) @st.cache_data(ttl=60 * 60 * 24) def load_assets(): for tag, asset in NFLVERSE_ASSETS: asset = NflVerseDataAsset(tag, asset) asset.register_asset_to_duckdb() def get_current_tables() -> list[str]: current_tables_df = duckdb.sql("SHOW TABLES").df() return current_tables_df["name"].tolist()