Spaces:
Sleeping
Sleeping
from bs4 import BeautifulSoup | |
import datetime | |
import pandas as pd | |
import requests | |
from typing import Mapping | |
NEWS_URL = "https://www.nbcsports.com/fantasy/football/player-news" | |
def find_soup_text_with_default(soup, element: str, find_search_map: Mapping[str, str]): | |
find_result = soup.find(element, find_search_map) | |
if not find_result: | |
return "" | |
return find_result.text.strip() | |
def parse_player_div(player_div): | |
return { | |
"Date/Time": player_div.find("div", {"class": "PlayerNewsPost-date"}).get("data-date"), | |
"Name": find_soup_text_with_default(player_div, "div", {"class": "PlayerNewsPost-name"}), | |
"Team": find_soup_text_with_default(player_div, "span", {"class": "PlayerNewsPost-team-abbr"}).upper(), | |
"Position": find_soup_text_with_default(player_div, "span", {"class": "PlayerNewsPost-position"}).title(), | |
"Headline": find_soup_text_with_default(player_div, "div", {"class": "PlayerNewsPost-headline"}), | |
"Analysis": find_soup_text_with_default(player_div, "div", {"class": "PlayerNewsPost-analysis"}), | |
} | |
def get_nfl_player_news(page_number: int = 1) -> pd.DataFrame: | |
url = f"{NEWS_URL}?p={page_number}" | |
request_page = requests.get(url) | |
soup = BeautifulSoup(request_page.content) | |
player_div_list = soup.find_all("div", {"class": "PlayerNewsPost"}) | |
if not player_div_list: | |
return pd.DataFrame() | |
parsed_player_list = [parse_player_div(d) for d in player_div_list] | |
df = pd.DataFrame(parsed_player_list) | |
df["Date/Time"] = pd.to_datetime(df["Date/Time"]) | |
return df | |
def get_player_news_window_hours(hours: int = 1): | |
end_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=hours) | |
page = 1 | |
max_pages = 20 | |
date_reached = False | |
df_list = [] | |
while page < max_pages and not date_reached: | |
last_news = get_nfl_player_news(page) | |
df_list.append(last_news) | |
date_reached = min(last_news["Date/Time"]) < end_date | |
page += 1 | |
return pd.concat(df_list) | |