Jon Solow
Fill 0 in for kicker stats to replace nans
60a62f4
import duckdb
import pandas as pd
import os
import streamlit as st
from typing import Callable
from domain.constants import SEASON
duckdb.default_connection.execute("SET GLOBAL pandas_analyze_sample=100000")
BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
FANTASY_POSITIONS = [
"QB",
"RB",
"WR",
"TE",
"FB",
"K",
]
def get_weekly_rosters(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""
with grouped_stats as (
select player_id, sum(fantasy_points) as fantasy_points
from player_stats_player_stats_{season_int}
group by player_id
)
SELECT
team
, wr.position
, wr.jersey_number
, wr.status
, wr.full_name
, wr.headshot_url
, wr.week
, wr.gsis_id
, gs.fantasy_points
from weekly_rosters_roster_weekly_{season_int} wr
left join grouped_stats gs
on wr.gsis_id = gs.player_id
"""
).df()
return df
def get_player_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""SELECT
player_id
, recent_team as team
, week
, passing_tds
, passing_yards
, passing_2pt_conversions
, sack_fumbles_lost
, interceptions
, rushing_tds
, rushing_yards
, rushing_2pt_conversions
, rushing_fumbles_lost
, receptions
, receiving_tds
, receiving_yards
, receiving_2pt_conversions
, receiving_fumbles_lost
, special_teams_tds
from player_stats_player_stats_{season_int}"""
).df()
return df
def get_player_kicking_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""SELECT
player_id
, week
, pat_made
, fg_made_0_19
, fg_made_20_29
, fg_made_30_39
, fg_made_40_49
, fg_made_50_59
, fg_made_60_
from player_stats_player_stats_kicking_{season_int}"""
).df()
return df.fillna(0.0)
def get_team_defense_stats(season_int: int | str = SEASON) -> pd.DataFrame:
df = duckdb.sql(
f"""
select
week
, team
, sum(def_sacks) as def_sacks
, sum(def_interceptions) as def_interceptions
, sum(def_tds) as def_tds
, sum(def_fumble_recovery_opp) as def_fumble_recovery_opp
, sum(def_safety) as def_safety
from player_stats_player_stats_def_{season_int}
group by week, team
"""
).df()
return df
NFLVERSE_ASSETS = [
("weekly_rosters", f"roster_weekly_{SEASON}.parquet"),
("player_stats", f"player_stats_{SEASON}.parquet"),
("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
("player_stats", f"player_stats_def_{SEASON}.parquet"),
]
class NflVerseDataAsset:
def __init__(
self,
release_tag: str,
asset_name: str,
dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
):
self.release_tag = release_tag
self.asset_name = asset_name
self.dataframe_mutation_fxn = dataframe_mutation_fxn
self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}"
def load_parquet_asset_to_df(self) -> pd.DataFrame:
location = os.path.join(BASE_URL, self.release_tag, self.asset_name)
df = pd.read_parquet(location)
return df
def register_asset_to_duckdb(self) -> None:
df = self.load_parquet_asset_to_df()
df = self.dataframe_mutation_fxn(df)
duckdb.register(self.table_name, df)
@st.cache_data(ttl=60 * 60 * 24)
def load_assets():
for tag, asset in NFLVERSE_ASSETS:
asset = NflVerseDataAsset(tag, asset)
asset.register_asset_to_duckdb()
def get_current_tables() -> list[str]:
current_tables_df = duckdb.sql("SHOW TABLES").df()
return current_tables_df["name"].tolist()