|
import duckdb |
|
import pandas as pd |
|
import os |
|
from typing import Callable |
|
|
|
from domain.constants import SEASON |
|
|
|
|
|
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000") |
|
|
|
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/" |
|
|
|
|
|
FANTASY_POSITIONS = [ |
|
"QB", |
|
"RB", |
|
"WR", |
|
"TE", |
|
"FB", |
|
"K", |
|
] |
|
|
|
|
|
def get_weekly_rosters(season_int: int | str = SEASON) -> pd.DataFrame: |
|
df = duckdb.sql( |
|
f""" |
|
with grouped_stats as ( |
|
select player_id, sum(fantasy_points) as fantasy_points |
|
from player_stats_player_stats_{season_int} |
|
group by player_id |
|
) |
|
SELECT |
|
team |
|
, wr.position |
|
, wr.jersey_number |
|
, wr.status |
|
, wr.full_name |
|
, wr.headshot_url |
|
, wr.week |
|
, wr.gsis_id |
|
, gs.fantasy_points |
|
from weekly_rosters_roster_weekly_{season_int} wr |
|
left join grouped_stats gs |
|
on wr.gsis_id = gs.player_id |
|
""" |
|
).df() |
|
return df |
|
|
|
|
|
def get_player_stats(season_int: int | str = SEASON) -> pd.DataFrame: |
|
df = duckdb.sql(f"SELECT * from player_stats_player_stats_{season_int}").df() |
|
return df |
|
|
|
|
|
NFLVERSE_ASSETS = [ |
|
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"), |
|
("player_stats", f"player_stats_{SEASON}.parquet"), |
|
] |
|
|
|
|
|
class NflVerseDataAsset: |
|
def __init__( |
|
self, |
|
release_tag: str, |
|
asset_name: str, |
|
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x, |
|
): |
|
self.release_tag = release_tag |
|
self.asset_name = asset_name |
|
self.dataframe_mutation_fxn = dataframe_mutation_fxn |
|
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}" |
|
|
|
def load_parquet_asset_to_df(self) -> pd.DataFrame: |
|
location = os.path.join(BASE_URL, self.release_tag, self.asset_name) |
|
df = pd.read_parquet(location) |
|
return df |
|
|
|
def register_asset_to_duckdb(self) -> None: |
|
df = self.load_parquet_asset_to_df() |
|
df = self.dataframe_mutation_fxn(df) |
|
duckdb.register(self.table_name, df) |
|
|
|
|
|
def load_assets(): |
|
for tag, asset in NFLVERSE_ASSETS: |
|
asset = NflVerseDataAsset(tag, asset) |
|
asset.register_asset_to_duckdb() |
|
|
|
|
|
def get_current_tables() -> list[str]: |
|
current_tables_df = duckdb.sql("SHOW TABLES").df() |
|
return current_tables_df["name"].tolist() |
|
|
|
|
|
def load_assets_if_no_tables(): |
|
if not len(get_current_tables()): |
|
load_assets() |
|
|