Spaces:
Runtime error
Runtime error
from scrapy import Request | |
from ....configs import PTT_BOARD, COOKIES | |
from typing import Callable, List, Optional | |
def fetch_ptt_boards( | |
boards_list: List[str], | |
callback: Callable, | |
index_from: Optional[str] = None, | |
index_to: Optional[str] = None, | |
): | |
"""The fetch_ptt_boards function fetches the ptt boards htm indexes. | |
Args: | |
boards_list (list): a list of boards | |
callback (Callable): a scrapy parse function | |
index_from (str | None): the starting html index | |
index_to (str | None): the ending html index | |
Returns: | |
a scrapy Request. | |
""" | |
for board in boards_list: | |
if index_from is not None and index_to is not None: | |
if int(index_from) > int(index_to): | |
raise ValueError( | |
"the value of `index_from` cannot be greater than `index_to`." | |
) | |
for index in range(int(index_from), int(index_to) + 1): | |
url = PTT_BOARD.format(board, index) | |
yield Request(url, cookies=COOKIES, callback=callback) | |
else: | |
url = PTT_BOARD.format(board, "") | |
yield Request(url, cookies=COOKIES, callback=callback) | |