Spaces:
Runtime error
Runtime error
from pathlib import Path | |
from io import BufferedWriter | |
from datetime import datetime | |
from ..spiders import PttSpider | |
from typing import Any, Dict, Tuple | |
from scrapy.exporters import BaseItemExporter, CsvItemExporter | |
class CsvPipeline: | |
""" | |
The CsvPipeline object writes the scraped item to csv. | |
""" | |
def open_spider(self, spider: PttSpider) -> None: | |
self.exporters_list: Dict[str, Tuple[BaseItemExporter, BufferedWriter]] = {} | |
def _exporter_for_item( | |
self, item: Dict[str, Any], spider: PttSpider | |
) -> CsvItemExporter: | |
data_dir = spider.data_dir | |
board = item.pop("board") | |
date = datetime.strptime(item["date"], "%Y-%m-%d %H:%M:%S") | |
year = date.year | |
month = date.month | |
dir_path = f"{data_dir}/{board}/{year}" | |
file_path = f"{dir_path}/{board}_{year}_{month}" | |
Path(dir_path).mkdir(parents=True, exist_ok=True) | |
if file_path not in self.exporters_list: | |
file = open(f"{file_path}.csv", "wb") | |
exporter = CsvItemExporter(file) | |
exporter.start_exporting() | |
self.exporters_list[file_path] = (exporter, file) | |
return self.exporters_list[file_path][0] | |
def process_item(self, item: Dict[str, Any], spider: PttSpider) -> Dict[str, Any]: | |
exporter = self._exporter_for_item(item, spider) | |
exporter.export_item(item) | |
return item | |
def close_spider(self, spider: PttSpider) -> None: | |
for exporter, csv_file in self.exporters_list.values(): | |
exporter.finish_exporting() | |
csv_file.close() | |