Spaces:
Sleeping
Sleeping
File size: 4,692 Bytes
d916065 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# Natural Language Toolkit: Twitter API
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Ewan Klein <[email protected]>
# Lorenzo Rubio <[email protected]>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
This module provides an interface for TweetHandlers, and support for timezone
handling.
"""
import time as _time
from abc import ABCMeta, abstractmethod
from datetime import datetime, timedelta, timezone, tzinfo
class LocalTimezoneOffsetWithUTC(tzinfo):
"""
This is not intended to be a general purpose class for dealing with the
local timezone. In particular:
* it assumes that the date passed has been created using
`datetime(..., tzinfo=Local)`, where `Local` is an instance of
the object `LocalTimezoneOffsetWithUTC`;
* for such an object, it returns the offset with UTC, used for date comparisons.
Reference: https://docs.python.org/3/library/datetime.html
"""
STDOFFSET = timedelta(seconds=-_time.timezone)
if _time.daylight:
DSTOFFSET = timedelta(seconds=-_time.altzone)
else:
DSTOFFSET = STDOFFSET
def utcoffset(self, dt):
"""
Access the relevant time offset.
"""
return self.DSTOFFSET
LOCAL = LocalTimezoneOffsetWithUTC()
class BasicTweetHandler(metaclass=ABCMeta):
"""
Minimal implementation of `TweetHandler`.
Counts the number of Tweets and decides when the client should stop
fetching them.
"""
def __init__(self, limit=20):
self.limit = limit
self.counter = 0
"""
A flag to indicate to the client whether to stop fetching data given
some condition (e.g., reaching a date limit).
"""
self.do_stop = False
"""
Stores the id of the last fetched Tweet to handle pagination.
"""
self.max_id = None
def do_continue(self):
"""
Returns `False` if the client should stop fetching Tweets.
"""
return self.counter < self.limit and not self.do_stop
class TweetHandlerI(BasicTweetHandler):
"""
Interface class whose subclasses should implement a handle method that
Twitter clients can delegate to.
"""
def __init__(self, limit=20, upper_date_limit=None, lower_date_limit=None):
"""
:param int limit: The number of data items to process in the current\
round of processing.
:param tuple upper_date_limit: The date at which to stop collecting\
new data. This should be entered as a tuple which can serve as the\
argument to `datetime.datetime`.\
E.g. `date_limit=(2015, 4, 1, 12, 40)` for 12:30 pm on April 1 2015.
:param tuple lower_date_limit: The date at which to stop collecting\
new data. See `upper_data_limit` for formatting.
"""
BasicTweetHandler.__init__(self, limit)
self.upper_date_limit = None
self.lower_date_limit = None
if upper_date_limit:
self.upper_date_limit = datetime(*upper_date_limit, tzinfo=LOCAL)
if lower_date_limit:
self.lower_date_limit = datetime(*lower_date_limit, tzinfo=LOCAL)
self.startingup = True
@abstractmethod
def handle(self, data):
"""
Deal appropriately with data returned by the Twitter API
"""
@abstractmethod
def on_finish(self):
"""
Actions when the tweet limit has been reached
"""
def check_date_limit(self, data, verbose=False):
"""
Validate date limits.
"""
if self.upper_date_limit or self.lower_date_limit:
date_fmt = "%a %b %d %H:%M:%S +0000 %Y"
tweet_date = datetime.strptime(data["created_at"], date_fmt).replace(
tzinfo=timezone.utc
)
if (self.upper_date_limit and tweet_date > self.upper_date_limit) or (
self.lower_date_limit and tweet_date < self.lower_date_limit
):
if self.upper_date_limit:
message = "earlier"
date_limit = self.upper_date_limit
else:
message = "later"
date_limit = self.lower_date_limit
if verbose:
print(
"Date limit {} is {} than date of current tweet {}".format(
date_limit, message, tweet_date
)
)
self.do_stop = True
|