from CrawDag.scraping.Scraper import Scraper from CrawDag.models import News from newspaper import Article class ScrapeArticle(Scraper): def __init__(self, listNews: list[News]) -> None: super().__init__(listNews) def scrape(self) -> list[News]: newsList: list[News] = [] for news in self.listNews: article = Article(news.link) article.download() article.parse() paragraphs = article.text.split('\n') news.content = '\n'.join([para for para in paragraphs[0:-1]]).strip() news.html = article.html if news.content != '' and len(news.content) > 10: # check if the content is not empty newsList.append(news) return newsList