Spaces:
Runtime error
Runtime error
from CrawDag.models import TaskHandle, DataExchange, News | |
from CrawDag.scraping import ScrapingTask | |
from CrawDag.saving.SavingMethod import MongoDataLake | |
from .DataLake import DataLake | |
class SavingTask(TaskHandle): | |
task_ids = None | |
key = 'scrape_news' | |
def __init__(self, task_ids: str) -> None: | |
super().__init__() | |
SavingTask.task_ids = task_ids | |
self.dataLake: DataLake = MongoDataLake() | |
def execute(self, **context: any): | |
dataExchange = DataExchange(context['ti']) | |
listNewsJson = dataExchange.pull(ScrapingTask.task_ids, ScrapingTask.key) | |
listNews = [News.from_json(newsJson) for newsJson in listNewsJson] | |
listNewsId = self.dataLake.save(listNews) | |
dataExchange.push(SavingTask.key, listNewsId) | |