Spaces:
Runtime error
Runtime error
from CrawDag.models import TaskHandle, DataExchange, News | |
from CrawDag.crawling import CrawlingTask | |
from .ScrapeMethod import ScrapeArticle | |
from .Scraper import Scraper | |
class ScrapingTask(TaskHandle): | |
task_ids = None | |
key = 'scrape_news' | |
def __init__(self, task_ids: str) -> None: | |
super().__init__() | |
ScrapingTask.task_ids = task_ids | |
def execute(self, **context: any): | |
dataExchange = DataExchange(context['ti']) | |
listNewsJson = dataExchange.pull(CrawlingTask.task_ids, CrawlingTask.key) | |
listNews = [News.from_json(newsJson) for newsJson in listNewsJson] | |
newsList:list[News] = ScrapeArticle(listNews).scrape() | |
dataExchange.push(ScrapingTask.key, [news.to_json() for news in newsList]) | |