File size: 4,692 Bytes
d916065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Natural Language Toolkit: Twitter API
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Ewan Klein <[email protected]>
#         Lorenzo Rubio <[email protected]>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT

"""

This module provides an interface for TweetHandlers, and support for timezone

handling.

"""

import time as _time
from abc import ABCMeta, abstractmethod
from datetime import datetime, timedelta, timezone, tzinfo


class LocalTimezoneOffsetWithUTC(tzinfo):
    """

    This is not intended to be a general purpose class for dealing with the

    local timezone. In particular:



    * it assumes that the date passed has been created using

      `datetime(..., tzinfo=Local)`, where `Local` is an instance of

      the object `LocalTimezoneOffsetWithUTC`;

    * for such an object, it returns the offset with UTC, used for date comparisons.



    Reference: https://docs.python.org/3/library/datetime.html

    """

    STDOFFSET = timedelta(seconds=-_time.timezone)

    if _time.daylight:
        DSTOFFSET = timedelta(seconds=-_time.altzone)
    else:
        DSTOFFSET = STDOFFSET

    def utcoffset(self, dt):
        """

        Access the relevant time offset.

        """
        return self.DSTOFFSET


LOCAL = LocalTimezoneOffsetWithUTC()


class BasicTweetHandler(metaclass=ABCMeta):
    """

    Minimal implementation of `TweetHandler`.



    Counts the number of Tweets and decides when the client should stop

    fetching them.

    """

    def __init__(self, limit=20):
        self.limit = limit
        self.counter = 0

        """

        A flag to indicate to the client whether to stop fetching data given

        some condition (e.g., reaching a date limit).

        """
        self.do_stop = False

        """

        Stores the id of the last fetched Tweet to handle pagination.

        """
        self.max_id = None

    def do_continue(self):
        """

        Returns `False` if the client should stop fetching Tweets.

        """
        return self.counter < self.limit and not self.do_stop


class TweetHandlerI(BasicTweetHandler):
    """

    Interface class whose subclasses should implement a handle method that

    Twitter clients can delegate to.

    """

    def __init__(self, limit=20, upper_date_limit=None, lower_date_limit=None):
        """

        :param int limit: The number of data items to process in the current\

        round of processing.



        :param tuple upper_date_limit: The date at which to stop collecting\

        new data. This should be entered as a tuple which can serve as the\

        argument to `datetime.datetime`.\

        E.g. `date_limit=(2015, 4, 1, 12, 40)` for 12:30 pm on April 1 2015.



        :param tuple lower_date_limit: The date at which to stop collecting\

        new data. See `upper_data_limit` for formatting.

        """
        BasicTweetHandler.__init__(self, limit)

        self.upper_date_limit = None
        self.lower_date_limit = None
        if upper_date_limit:
            self.upper_date_limit = datetime(*upper_date_limit, tzinfo=LOCAL)
        if lower_date_limit:
            self.lower_date_limit = datetime(*lower_date_limit, tzinfo=LOCAL)

        self.startingup = True

    @abstractmethod
    def handle(self, data):
        """

        Deal appropriately with data returned by the Twitter API

        """

    @abstractmethod
    def on_finish(self):
        """

        Actions when the tweet limit has been reached

        """

    def check_date_limit(self, data, verbose=False):
        """

        Validate date limits.

        """
        if self.upper_date_limit or self.lower_date_limit:
            date_fmt = "%a %b %d %H:%M:%S +0000 %Y"
            tweet_date = datetime.strptime(data["created_at"], date_fmt).replace(
                tzinfo=timezone.utc
            )
            if (self.upper_date_limit and tweet_date > self.upper_date_limit) or (
                self.lower_date_limit and tweet_date < self.lower_date_limit
            ):
                if self.upper_date_limit:
                    message = "earlier"
                    date_limit = self.upper_date_limit
                else:
                    message = "later"
                    date_limit = self.lower_date_limit
                if verbose:
                    print(
                        "Date limit {} is {} than date of current tweet {}".format(
                            date_limit, message, tweet_date
                        )
                    )
                self.do_stop = True