Jon Solow commited on
Commit
958ade1
·
1 Parent(s): c412d07

Add fix for players with no team in player news

Browse files
src/queries/nbcsports/player_news.py CHANGED
@@ -14,12 +14,19 @@ def get_text_from_find_all(soup, element: str, find_search_map: Mapping[str, str
14
  return text_list
15
 
16
 
 
 
 
 
 
 
 
17
  def get_nfl_player_news(page_number: int = 1) -> pd.DataFrame:
18
  url = f"{NEWS_URL}?p={page_number}"
19
  request_page = requests.get(url)
20
  soup = BeautifulSoup(request_page.content)
21
  player_names_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-name"})
22
- team_abbr_list = get_text_from_find_all(soup, "span", {"class": "PlayerNewsPost-team-abbr"})
23
  position_list = get_text_from_find_all(soup, "span", {"class": "PlayerNewsPost-position"})
24
  headline_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-headline"})
25
  analysis_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-analysis"})
 
14
  return text_list
15
 
16
 
17
+ def get_team_names(soup):
18
+ player_team_div_list = soup.find_all("div", "PlayerNewsPost-team")
19
+ player_team_abbr_span_list = [x.find("span", {"class": "PlayerNewsPost-team-abbr"}) for x in player_team_div_list]
20
+ player_team_abbr_list = [x.text.upper() if x else "" for x in player_team_abbr_span_list]
21
+ return player_team_abbr_list
22
+
23
+
24
  def get_nfl_player_news(page_number: int = 1) -> pd.DataFrame:
25
  url = f"{NEWS_URL}?p={page_number}"
26
  request_page = requests.get(url)
27
  soup = BeautifulSoup(request_page.content)
28
  player_names_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-name"})
29
+ team_abbr_list = get_team_names(soup)
30
  position_list = get_text_from_find_all(soup, "span", {"class": "PlayerNewsPost-position"})
31
  headline_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-headline"})
32
  analysis_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-analysis"})