Jon Solow
Use left join for fantasy stats sort which was missing kickers
7e7c54c
raw
history blame
2.71 kB
import duckdb
import pandas as pd
import os
from typing import Callable
from domain.constants import SEASON
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000")
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
FANTASY_POSITIONS = [
"QB",
"RB",
"WR",
"TE",
"FB",
"K",
]
def get_weekly_rosters(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""
with grouped_stats as (
select player_id, sum(fantasy_points) as fantasy_points
from player_stats_player_stats_{season_int}
group by player_id
)
SELECT
team
, wr.position
, wr.jersey_number
, wr.status
, wr.full_name
, wr.headshot_url
, wr.week
, wr.gsis_id
, gs.fantasy_points
from weekly_rosters_roster_weekly_{season_int} wr
left join grouped_stats gs
on wr.gsis_id = gs.player_id
"""
).df()
return df
def get_player_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(f"SELECT * from player_stats_player_stats_{season_int}").df()
return df
NFLVERSE_ASSETS = [
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
("player_stats", f"player_stats_{SEASON}.parquet"),
]
class NflVerseDataAsset:
def __init__(
self,
release_tag: str,
asset_name: str,
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
):
self.release_tag = release_tag
self.asset_name = asset_name
self.dataframe_mutation_fxn = dataframe_mutation_fxn
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}"
def load_parquet_asset_to_df(self) -> pd.DataFrame:
location = os.path.join(BASE_URL, self.release_tag, self.asset_name)
df = pd.read_parquet(location)
return df
def register_asset_to_duckdb(self) -> None:
df = self.load_parquet_asset_to_df()
df = self.dataframe_mutation_fxn(df)
duckdb.register(self.table_name, df)
def load_assets():
for tag, asset in NFLVERSE_ASSETS:
asset = NflVerseDataAsset(tag, asset)
asset.register_asset_to_duckdb()
def get_current_tables() -> list[str]:
current_tables_df = duckdb.sql("SHOW TABLES").df()
return current_tables_df["name"].tolist()
def load_assets_if_no_tables():
if not len(get_current_tables()):
load_assets()