File size: 776 Bytes
2700879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from CrawDag.models import TaskHandle, DataExchange, News
from CrawDag.scraping import ScrapingTask
from CrawDag.saving.SavingMethod import MongoDataLake
from .DataLake import DataLake
class SavingTask(TaskHandle):
    task_ids = None
    key = 'scrape_news'

    def __init__(self, task_ids: str) -> None:
        super().__init__()
        SavingTask.task_ids = task_ids
        self.dataLake: DataLake = MongoDataLake()

    def execute(self, **context: any):
        dataExchange = DataExchange(context['ti'])
        listNewsJson = dataExchange.pull(ScrapingTask.task_ids, ScrapingTask.key)
        listNews = [News.from_json(newsJson) for newsJson in listNewsJson]

        listNewsId = self.dataLake.save(listNews)
        dataExchange.push(SavingTask.key, listNewsId)