Spaces:
Sleeping
Sleeping
File size: 2,049 Bytes
c412d07 54505df c412d07 54505df a29184a 54505df 958ade1 c412d07 54505df c412d07 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from bs4 import BeautifulSoup
import datetime
import pandas as pd
import requests
from typing import Mapping
NEWS_URL = "https://www.nbcsports.com/fantasy/football/player-news"
def find_soup_text_with_default(soup, element: str, find_search_map: Mapping[str, str]):
find_result = soup.find(element, find_search_map)
if not find_result:
return ""
return find_result.text.strip()
def parse_player_div(player_div):
return {
"Date/Time": player_div.find("div", {"class": "PlayerNewsPost-date"}).get("data-date"),
"Name": find_soup_text_with_default(player_div, "div", {"class": "PlayerNewsPost-name"}),
"Team": find_soup_text_with_default(player_div, "span", {"class": "PlayerNewsPost-team-abbr"}).upper(),
"Position": find_soup_text_with_default(player_div, "span", {"class": "PlayerNewsPost-position"}).title(),
"Headline": find_soup_text_with_default(player_div, "div", {"class": "PlayerNewsPost-headline"}),
"Analysis": find_soup_text_with_default(player_div, "div", {"class": "PlayerNewsPost-analysis"}),
}
def get_nfl_player_news(page_number: int = 1) -> pd.DataFrame:
url = f"{NEWS_URL}?p={page_number}"
request_page = requests.get(url)
soup = BeautifulSoup(request_page.content)
player_div_list = soup.find_all("div", {"class": "PlayerNewsPost"})
if not player_div_list:
return pd.DataFrame()
parsed_player_list = [parse_player_div(d) for d in player_div_list]
df = pd.DataFrame(parsed_player_list)
df["Date/Time"] = pd.to_datetime(df["Date/Time"])
return df
def get_player_news_window_hours(hours: int = 1):
end_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=hours)
page = 1
max_pages = 20
date_reached = False
df_list = []
while page < max_pages and not date_reached:
last_news = get_nfl_player_news(page)
df_list.append(last_news)
date_reached = min(last_news["Date/Time"]) < end_date
page += 1
return pd.concat(df_list)
|