Jon Solow commited on
Commit
962e128
·
1 Parent(s): 4d4b05d

Add load data page using duckdb

Browse files
src/Home.py CHANGED
@@ -4,6 +4,8 @@ from config import DEFAULT_ICON, LEAGUE_NAME
4
  from login_component import get_authorization_button
5
  from page_selector import remove_seasonal_pages
6
 
 
 
7
 
8
  def get_app():
9
  keeper_title = f"{LEAGUE_NAME}"
@@ -24,4 +26,5 @@ def get_app():
24
 
25
 
26
  if __name__ == "__main__":
 
27
  get_app()
 
4
  from login_component import get_authorization_button
5
  from page_selector import remove_seasonal_pages
6
 
7
+ from queries.nflverse.github_data import load_assets
8
+
9
 
10
  def get_app():
11
  keeper_title = f"{LEAGUE_NAME}"
 
26
 
27
 
28
  if __name__ == "__main__":
29
+ load_assets()
30
  get_app()
src/pages/98_Load_Data.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import duckdb
2
+ import streamlit as st
3
+
4
+ from config import DEFAULT_ICON
5
+ from shared_page import common_page_config
6
+
7
+ from queries.nflverse.github_data import load_assets
8
+
9
+
10
+ def get_page():
11
+ page_title = "Data Loader"
12
+ st.set_page_config(page_title=page_title, page_icon=DEFAULT_ICON, layout="wide")
13
+ common_page_config()
14
+ st.title(page_title)
15
+
16
+ current_tables_df = duckdb.sql("SHOW TABLES").df()
17
+ current_tables_list = current_tables_df["name"].tolist()
18
+
19
+ if st.button("Refresh Data"):
20
+ load_assets()
21
+
22
+ if selected_table := st.selectbox("Describe a table:", current_tables_list, index=0):
23
+ describe_df = duckdb.sql(f"DESCRIBE {selected_table}").df()
24
+ st.dataframe(
25
+ describe_df,
26
+ hide_index=True,
27
+ use_container_width=True,
28
+ )
29
+
30
+
31
+ if __name__ == "__main__":
32
+ get_page()
src/queries/nflverse/github_data.py CHANGED
@@ -1,4 +1,10 @@
 
1
  import pandas as pd
 
 
 
 
 
2
 
3
 
4
  def get_parquet_github(season_int: int, parquet_prefix: str):
@@ -18,7 +24,7 @@ FANTASY_POSITIONS = [
18
 
19
 
20
  def get_snap_counts(season_int: int) -> pd.DataFrame:
21
- df = get_parquet_github(season_int, "snap_counts/snap_counts")
22
  df["fantasy_position"] = df["position"].isin(FANTASY_POSITIONS)
23
  return df
24
 
@@ -37,3 +43,70 @@ def get_ftn_charting(season_int: int) -> pd.DataFrame:
37
 
38
  def get_pbp_participation(season_int: int) -> pd.DataFrame:
39
  return get_parquet_github(season_int, "pbp_participation/pbp_participation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import duckdb
2
  import pandas as pd
3
+ import os
4
+ from typing import Callable
5
+
6
+
7
+ BASE_URL = "https://github.com/nflverse/nflverse-data/releases/download/"
8
 
9
 
10
  def get_parquet_github(season_int: int, parquet_prefix: str):
 
24
 
25
 
26
  def get_snap_counts(season_int: int) -> pd.DataFrame:
27
+ df = duckdb.sql(f"SELECT * from snap_counts_snap_counts_{season_int}").df()
28
  df["fantasy_position"] = df["position"].isin(FANTASY_POSITIONS)
29
  return df
30
 
 
43
 
44
  def get_pbp_participation(season_int: int) -> pd.DataFrame:
45
  return get_parquet_github(season_int, "pbp_participation/pbp_participation")
46
+
47
+
48
+ SEASON = "2023"
49
+
50
+ NFLVERSE_ASSETS = [
51
+ ("ftn_charting", f"ftn_charting_{SEASON}.parquet"),
52
+ ("espn_data", "qbr_season_level.parquet"),
53
+ ("espn_data", "qbr_week_level.parquet"),
54
+ ("players", "players.parquet"),
55
+ ("pbp_participation", f"pbp_participation_{SEASON}.parquet"),
56
+ ("snap_counts", f"snap_counts_{SEASON}.parquet"),
57
+ ("player_stats", f"player_stats_{SEASON}.parquet"),
58
+ ("player_stats", f"player_stats_def_{SEASON}.parquet"),
59
+ ("player_stats", f"player_stats_kicking_{SEASON}.parquet"),
60
+ ("pfr_advstats", "advstats_season_def.parquet"),
61
+ ("pfr_advstats", "advstats_season_pass.parquet"),
62
+ ("pfr_advstats", "advstats_season_rec.parquet"),
63
+ ("pfr_advstats", "advstats_season_rush.parquet"),
64
+ ("pfr_advstats", f"advstats_week_def_{SEASON}.parquet"),
65
+ ("pfr_advstats", f"advstats_week_pass_{SEASON}.parquet"),
66
+ ("pfr_advstats", f"advstats_week_rec_{SEASON}.parquet"),
67
+ ("pfr_advstats", f"advstats_week_rush_{SEASON}.parquet"),
68
+ ("pbp", f"play_by_play_{SEASON}.parquet"),
69
+ ("nextgen_stats", "ngs_passing.parquet"),
70
+ ("nextgen_stats", "ngs_receiving.parquet"),
71
+ ("nextgen_stats", "ngs_rushing.parquet"),
72
+ ]
73
+
74
+
75
+ class NflVerseDataAsset:
76
+ def __init__(
77
+ self,
78
+ release_tag: str,
79
+ asset_name: str,
80
+ dataframe_mutation_fxn: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
81
+ ):
82
+ self.release_tag = release_tag
83
+ self.asset_name = asset_name
84
+ self.dataframe_mutation_fxn = dataframe_mutation_fxn
85
+ self.table_name = f"{release_tag}_{asset_name.rsplit('.', 1)[0]}"
86
+
87
+ def load_parquet_asset_to_df(self) -> pd.DataFrame:
88
+ location = os.path.join(BASE_URL, self.release_tag, self.asset_name)
89
+ df = pd.read_parquet(location)
90
+ return df
91
+
92
+ def register_asset_to_duckdb(self) -> None:
93
+ df = self.load_parquet_asset_to_df()
94
+ df = self.dataframe_mutation_fxn(df)
95
+ duckdb.register(self.table_name, df)
96
+
97
+
98
+ def load_assets():
99
+ for tag, asset in NFLVERSE_ASSETS:
100
+ asset = NflVerseDataAsset(tag, asset)
101
+ asset.register_asset_to_duckdb()
102
+
103
+
104
+ def main():
105
+ load_assets()
106
+ import pdb
107
+
108
+ pdb.set_trace()
109
+
110
+
111
+ if __name__ == "__main__":
112
+ main()