File size: 765 Bytes
2700879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from CrawDag.models import TaskHandle, DataExchange, News
from CrawDag.crawling import CrawlingTask
from .ScrapeMethod import ScrapeArticle
from .Scraper import Scraper
class ScrapingTask(TaskHandle):
    task_ids = None
    key = 'scrape_news'

    def __init__(self, task_ids: str) -> None:
        super().__init__()
        ScrapingTask.task_ids = task_ids

    def execute(self, **context: any):
        dataExchange = DataExchange(context['ti'])
        listNewsJson = dataExchange.pull(CrawlingTask.task_ids, CrawlingTask.key)
        listNews = [News.from_json(newsJson) for newsJson in listNewsJson]

        newsList:list[News] = ScrapeArticle(listNews).scrape()
        dataExchange.push(ScrapingTask.key, [news.to_json() for news in newsList])