|
import duckdb |
|
import pandas as pd |
|
import os |
|
import streamlit as st |
|
from typing import Callable |
|
|
|
from domain.constants import SEASON |
|
|
|
|
|
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000") |
|
|
|
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/" |
|
|
|
|
|
FANTASY_POSITIONS = [ |
|
"QB", |
|
"RB", |
|
"WR", |
|
"TE", |
|
"FB", |
|
"K", |
|
] |
|
|
|
|
|
def get_weekly_rosters(season_int: int | str = SEASON) -> pd.DataFrame: |
|
df = duckdb.sql( |
|
f""" |
|
with grouped_stats as ( |
|
select player_id, sum(fantasy_points) as fantasy_points |
|
from player_stats_player_stats_{season_int} |
|
group by player_id |
|
) |
|
SELECT |
|
team |
|
, wr.position |
|
, wr.jersey_number |
|
, wr.status |
|
, wr.full_name |
|
, wr.headshot_url |
|
, wr.week |
|
, wr.gsis_id |
|
, gs.fantasy_points |
|
from weekly_rosters_roster_weekly_{season_int} wr |
|
left join grouped_stats gs |
|
on wr.gsis_id = gs.player_id |
|
""" |
|
).df() |
|
return df |
|
|
|
|
|
def get_player_stats(season_int: int | str = SEASON) -> pd.DataFrame: |
|
df = duckdb.sql( |
|
f"""SELECT |
|
player_id |
|
, recent_team as team |
|
, week |
|
, passing_tds |
|
, passing_yards |
|
, passing_2pt_conversions |
|
, sack_fumbles_lost |
|
, interceptions |
|
, rushing_tds |
|
, rushing_yards |
|
, rushing_2pt_conversions |
|
, rushing_fumbles_lost |
|
, receptions |
|
, receiving_tds |
|
, receiving_yards |
|
, receiving_2pt_conversions |
|
, receiving_fumbles_lost |
|
, special_teams_tds |
|
from player_stats_player_stats_{season_int}""" |
|
).df() |
|
return df |
|
|
|
|
|
def get_player_kicking_stats(season_int: int | str = SEASON) -> pd.DataFrame: |
|
df = duckdb.sql( |
|
f"""SELECT |
|
player_id |
|
, week |
|
, pat_made |
|
, fg_made_0_19 |
|
, fg_made_20_29 |
|
, fg_made_30_39 |
|
, fg_made_40_49 |
|
, fg_made_50_59 |
|
, fg_made_60_ |
|
from player_stats_player_stats_kicking_{season_int}""" |
|
).df() |
|
return df.fillna(0.0) |
|
|
|
|
|
def get_team_defense_stats(season_int: int | str = SEASON) -> pd.DataFrame: |
|
df = duckdb.sql( |
|
f""" |
|
select |
|
week |
|
, team |
|
, sum(def_sacks) as def_sacks |
|
, sum(def_interceptions) as def_interceptions |
|
, sum(def_tds) as def_tds |
|
, sum(def_fumble_recovery_opp) as def_fumble_recovery_opp |
|
, sum(def_safety) as def_safety |
|
from player_stats_player_stats_def_{season_int} |
|
group by week, team |
|
""" |
|
).df() |
|
return df |
|
|
|
|
|
NFLVERSE_ASSETS = [ |
|
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"), |
|
("player_stats", f"player_stats_{SEASON}.parquet"), |
|
("player_stats", f"player_stats_kicking_{SEASON}.parquet"), |
|
("player_stats", f"player_stats_def_{SEASON}.parquet"), |
|
] |
|
|
|
|
|
class NflVerseDataAsset: |
|
def __init__( |
|
self, |
|
release_tag: str, |
|
asset_name: str, |
|
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x, |
|
): |
|
self.release_tag = release_tag |
|
self.asset_name = asset_name |
|
self.dataframe_mutation_fxn = dataframe_mutation_fxn |
|
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}" |
|
|
|
def load_parquet_asset_to_df(self) -> pd.DataFrame: |
|
location = os.path.join(BASE_URL, self.release_tag, self.asset_name) |
|
df = pd.read_parquet(location) |
|
return df |
|
|
|
def register_asset_to_duckdb(self) -> None: |
|
df = self.load_parquet_asset_to_df() |
|
df = self.dataframe_mutation_fxn(df) |
|
duckdb.register(self.table_name, df) |
|
|
|
|
|
@st.cache_data(ttl=60 * 60 * 24) |
|
def load_assets(): |
|
for tag, asset in NFLVERSE_ASSETS: |
|
asset = NflVerseDataAsset(tag, asset) |
|
asset.register_asset_to_duckdb() |
|
|
|
|
|
def get_current_tables() -> list[str]: |
|
current_tables_df = duckdb.sql("SHOW TABLES").df() |
|
return current_tables_df["name"].tolist() |
|
|