Eurosmart's picture
.
2700879
raw
history blame contribute delete
776 Bytes
from CrawDag.models import TaskHandle, DataExchange, News
from CrawDag.scraping import ScrapingTask
from CrawDag.saving.SavingMethod import MongoDataLake
from .DataLake import DataLake
class SavingTask(TaskHandle):
task_ids = None
key = 'scrape_news'
def __init__(self, task_ids: str) -> None:
super().__init__()
SavingTask.task_ids = task_ids
self.dataLake: DataLake = MongoDataLake()
def execute(self, **context: any):
dataExchange = DataExchange(context['ti'])
listNewsJson = dataExchange.pull(ScrapingTask.task_ids, ScrapingTask.key)
listNews = [News.from_json(newsJson) for newsJson in listNewsJson]
listNewsId = self.dataLake.save(listNews)
dataExchange.push(SavingTask.key, listNewsId)