Eurosmart's picture
.
2700879
raw
history blame contribute delete
765 Bytes
from CrawDag.models import TaskHandle, DataExchange, News
from CrawDag.crawling import CrawlingTask
from .ScrapeMethod import ScrapeArticle
from .Scraper import Scraper
class ScrapingTask(TaskHandle):
task_ids = None
key = 'scrape_news'
def __init__(self, task_ids: str) -> None:
super().__init__()
ScrapingTask.task_ids = task_ids
def execute(self, **context: any):
dataExchange = DataExchange(context['ti'])
listNewsJson = dataExchange.pull(CrawlingTask.task_ids, CrawlingTask.key)
listNews = [News.from_json(newsJson) for newsJson in listNewsJson]
newsList:list[News] = ScrapeArticle(listNews).scrape()
dataExchange.push(ScrapingTask.key, [news.to_json() for news in newsList])