Spaces:
Runtime error
Runtime error
File size: 774 Bytes
2700879 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
from CrawDag.scraping.Scraper import Scraper
from CrawDag.models import News
from bs4 import BeautifulSoup
import requests
class ScrapeBasic(Scraper):
def __init__(self, listNews: list[News]) -> None:
self.listNews = listNews
def scrape(self) -> list[News]:
newsList: list[News] = []
for news in self.listNews:
response = requests.get(news.link, verify=False)
soup = BeautifulSoup(response.content, 'html.parser')
paragraphs = soup.find_all('p')
content = ' '.join([para.get_text() for para in paragraphs[0:-1]])
html = soup.find('article')
news.content = content.strip()
news.html = html
newsList.append(news)
return newsList
|