Spaces:
Sleeping
Sleeping
Jon Solow
commited on
Commit
·
77fb55b
1
Parent(s):
1f1a2a2
Parse out team names of practice reports to fix bug where only opponent report exists
Browse files
src/queries/nfl_teams/practice_reports.py
CHANGED
@@ -1,8 +1,10 @@
|
|
|
|
1 |
import datetime
|
2 |
from multiprocessing import Pool
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
from pydantic import BaseModel, Field
|
|
|
6 |
from typing import Optional
|
7 |
from urllib.parse import urljoin
|
8 |
|
@@ -72,11 +74,23 @@ class PracticeReportRawRow(BaseModel):
|
|
72 |
return cls(**{DAY_OF_WEEK_STRING_MAPPING.get(k, k): cls.replace_nan(v) for k, v in input_dict.items()})
|
73 |
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def scrape_team_injury_report(team: NFLTeam) -> pd.DataFrame:
|
76 |
print(f"Scraping Injury Report for: {team.team_full_name}")
|
77 |
-
injury_report_url = urljoin(team.injury_report_url, f"week/REG-{CURRENT_WEEK}")
|
78 |
try:
|
79 |
-
team_report =
|
80 |
except Exception:
|
81 |
print(f"Failed to scrape practice report for: {team.team_full_name}")
|
82 |
return pd.DataFrame()
|
|
|
1 |
+
from bs4 import BeautifulSoup
|
2 |
import datetime
|
3 |
from multiprocessing import Pool
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
from pydantic import BaseModel, Field
|
7 |
+
import requests
|
8 |
from typing import Optional
|
9 |
from urllib.parse import urljoin
|
10 |
|
|
|
74 |
return cls(**{DAY_OF_WEEK_STRING_MAPPING.get(k, k): cls.replace_nan(v) for k, v in input_dict.items()})
|
75 |
|
76 |
|
77 |
+
def get_injury_report_dataframe(team: NFLTeam):
|
78 |
+
injury_report_url = urljoin(team.injury_report_url, f"week/REG-{CURRENT_WEEK}")
|
79 |
+
report_request = requests.get(injury_report_url)
|
80 |
+
report_soup = BeautifulSoup(report_request.content)
|
81 |
+
team_names_spans = report_soup.find_all("span", {"class": "nfl-o-injury-report__club-name"})
|
82 |
+
assert team_names_spans
|
83 |
+
team_names_str = [x.get_text() for x in team_names_spans]
|
84 |
+
assert team_names_str[0] == team.team_full_name
|
85 |
+
tables = report_soup.find_all("table")
|
86 |
+
df_report = pd.read_html(str(tables))[0]
|
87 |
+
return df_report
|
88 |
+
|
89 |
+
|
90 |
def scrape_team_injury_report(team: NFLTeam) -> pd.DataFrame:
|
91 |
print(f"Scraping Injury Report for: {team.team_full_name}")
|
|
|
92 |
try:
|
93 |
+
team_report = get_injury_report_dataframe(team)
|
94 |
except Exception:
|
95 |
print(f"Failed to scrape practice report for: {team.team_full_name}")
|
96 |
return pd.DataFrame()
|