File size: 1,200 Bytes
21e639d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from scrapy import Request
from ....configs import PTT_BOARD, COOKIES
from typing import Callable, List, Optional


def fetch_ptt_boards(
    boards_list: List[str],
    callback: Callable,
    index_from: Optional[str] = None,
    index_to: Optional[str] = None,
):
    """The fetch_ptt_boards function fetches the ptt boards htm indexes.

    Args:
        boards_list (list): a list of boards
        callback (Callable): a scrapy parse function
        index_from (str | None): the starting html index
        index_to (str | None): the ending html index
    Returns:
        a scrapy Request.
    """

    for board in boards_list:
        if index_from is not None and index_to is not None:
            if int(index_from) > int(index_to):
                raise ValueError(
                    "the value of `index_from` cannot be greater than `index_to`."
                )

            for index in range(int(index_from), int(index_to) + 1):
                url = PTT_BOARD.format(board, index)
                yield Request(url, cookies=COOKIES, callback=callback)
        else:
            url = PTT_BOARD.format(board, "")
            yield Request(url, cookies=COOKIES, callback=callback)