geored's picture
Upload folder using huggingface_hub
0a06673 verified
from datetime import datetime
from uuid import uuid4
import logging
import numbers
import atexit
from dateutil.tz import tzutc
from six import string_types
from analytics.utils import guess_timezone, clean
from analytics.consumer import Consumer
from analytics.request import post
from analytics.version import VERSION
try:
import queue
except ImportError:
import Queue as queue
ID_TYPES = (numbers.Number, string_types)
class Client(object):
class DefaultConfig(object):
write_key = None
host = None
on_error = None
debug = False
send = True
sync_mode = False
max_queue_size = 10000
gzip = False
timeout = 15
max_retries = 10
proxies = None
thread = 1
upload_interval = 0.5
upload_size = 100
max_retries = 10
"""Create a new Segment client."""
log = logging.getLogger('segment')
def __init__(self,
write_key=DefaultConfig.write_key,
host=DefaultConfig.host,
debug=DefaultConfig.debug,
max_queue_size=DefaultConfig.max_queue_size,
send=DefaultConfig.send,
on_error=DefaultConfig.on_error,
gzip=DefaultConfig.gzip,
max_retries=DefaultConfig.max_retries,
sync_mode=DefaultConfig.sync_mode,
timeout=DefaultConfig.timeout,
proxies=DefaultConfig.proxies,
thread=DefaultConfig.thread,
upload_size=DefaultConfig.upload_size,
upload_interval=DefaultConfig.upload_interval,):
require('write_key', write_key, string_types)
self.queue = queue.Queue(max_queue_size)
self.write_key = write_key
self.on_error = on_error
self.debug = debug
self.send = send
self.sync_mode = sync_mode
self.host = host
self.gzip = gzip
self.timeout = timeout
self.proxies = proxies
if debug:
self.log.setLevel(logging.DEBUG)
if sync_mode:
self.consumers = None
else:
# On program exit, allow the consumer thread to exit cleanly.
# This prevents exceptions and a messy shutdown when the
# interpreter is destroyed before the daemon thread finishes
# execution. However, it is *not* the same as flushing the queue!
# To guarantee all messages have been delivered, you'll still need
# to call flush().
if send:
atexit.register(self.join)
for _ in range(thread):
self.consumers = []
consumer = Consumer(
self.queue, write_key, host=host, on_error=on_error,
upload_size=upload_size, upload_interval=upload_interval,
gzip=gzip, retries=max_retries, timeout=timeout,
proxies=proxies,
)
self.consumers.append(consumer)
# if we've disabled sending, just don't start the consumer
if send:
consumer.start()
def identify(self, user_id=None, traits=None, context=None, timestamp=None,
anonymous_id=None, integrations=None, message_id=None):
traits = traits or {}
context = context or {}
integrations = integrations or {}
require('user_id or anonymous_id', user_id or anonymous_id, ID_TYPES)
require('traits', traits, dict)
msg = {
'integrations': integrations,
'anonymousId': anonymous_id,
'timestamp': timestamp,
'context': context,
'type': 'identify',
'userId': user_id,
'traits': traits,
'messageId': message_id,
}
return self._enqueue(msg)
def track(self, user_id=None, event=None, properties=None, context=None,
timestamp=None, anonymous_id=None, integrations=None,
message_id=None):
properties = properties or {}
context = context or {}
integrations = integrations or {}
require('user_id or anonymous_id', user_id or anonymous_id, ID_TYPES)
require('properties', properties, dict)
require('event', event, string_types)
msg = {
'integrations': integrations,
'anonymousId': anonymous_id,
'properties': properties,
'timestamp': timestamp,
'context': context,
'userId': user_id,
'type': 'track',
'event': event,
'messageId': message_id,
}
return self._enqueue(msg)
def alias(self, previous_id=None, user_id=None, context=None,
timestamp=None, integrations=None, message_id=None):
context = context or {}
integrations = integrations or {}
require('previous_id', previous_id, ID_TYPES)
require('user_id', user_id, ID_TYPES)
msg = {
'integrations': integrations,
'previousId': previous_id,
'timestamp': timestamp,
'context': context,
'userId': user_id,
'type': 'alias',
'messageId': message_id,
}
return self._enqueue(msg)
def group(self, user_id=None, group_id=None, traits=None, context=None,
timestamp=None, anonymous_id=None, integrations=None,
message_id=None):
traits = traits or {}
context = context or {}
integrations = integrations or {}
require('user_id or anonymous_id', user_id or anonymous_id, ID_TYPES)
require('group_id', group_id, ID_TYPES)
require('traits', traits, dict)
msg = {
'integrations': integrations,
'anonymousId': anonymous_id,
'timestamp': timestamp,
'groupId': group_id,
'context': context,
'userId': user_id,
'traits': traits,
'type': 'group',
'messageId': message_id,
}
return self._enqueue(msg)
def page(self, user_id=None, category=None, name=None, properties=None,
context=None, timestamp=None, anonymous_id=None,
integrations=None, message_id=None):
properties = properties or {}
context = context or {}
integrations = integrations or {}
require('user_id or anonymous_id', user_id or anonymous_id, ID_TYPES)
require('properties', properties, dict)
if name:
require('name', name, string_types)
if category:
require('category', category, string_types)
msg = {
'integrations': integrations,
'anonymousId': anonymous_id,
'properties': properties,
'timestamp': timestamp,
'category': category,
'context': context,
'userId': user_id,
'type': 'page',
'name': name,
'messageId': message_id,
}
return self._enqueue(msg)
def screen(self, user_id=None, category=None, name=None, properties=None,
context=None, timestamp=None, anonymous_id=None,
integrations=None, message_id=None):
properties = properties or {}
context = context or {}
integrations = integrations or {}
require('user_id or anonymous_id', user_id or anonymous_id, ID_TYPES)
require('properties', properties, dict)
if name:
require('name', name, string_types)
if category:
require('category', category, string_types)
msg = {
'integrations': integrations,
'anonymousId': anonymous_id,
'properties': properties,
'timestamp': timestamp,
'category': category,
'context': context,
'userId': user_id,
'type': 'screen',
'name': name,
'messageId': message_id,
}
return self._enqueue(msg)
def _enqueue(self, msg):
"""Push a new `msg` onto the queue, return `(success, msg)`"""
timestamp = msg['timestamp']
if timestamp is None:
timestamp = datetime.utcnow().replace(tzinfo=tzutc())
message_id = msg.get('messageId')
if message_id is None:
message_id = uuid4()
require('integrations', msg['integrations'], dict)
require('type', msg['type'], string_types)
require('timestamp', timestamp, datetime)
require('context', msg['context'], dict)
# add common
timestamp = guess_timezone(timestamp)
msg['timestamp'] = timestamp.isoformat()
msg['messageId'] = stringify_id(message_id)
msg['context']['library'] = {
'name': 'analytics-python',
'version': VERSION
}
msg['userId'] = stringify_id(msg.get('userId', None))
msg['anonymousId'] = stringify_id(msg.get('anonymousId', None))
msg = clean(msg)
self.log.debug('queueing: %s', msg)
# if send is False, return msg as if it was successfully queued
if not self.send:
return True, msg
if self.sync_mode:
self.log.debug('enqueued with blocking %s.', msg['type'])
post(self.write_key, self.host, gzip=self.gzip,
timeout=self.timeout, proxies=self.proxies, batch=[msg])
return True, msg
try:
self.queue.put(msg, block=False)
self.log.debug('enqueued %s.', msg['type'])
return True, msg
except queue.Full:
self.log.warning('analytics-python queue is full')
return False, msg
def flush(self):
"""Forces a flush from the internal queue to the server"""
queue = self.queue
size = queue.qsize()
queue.join()
# Note that this message may not be precise, because of threading.
self.log.debug('successfully flushed about %s items.', size)
def join(self):
"""Ends the consumer thread once the queue is empty.
Blocks execution until finished
"""
for consumer in self.consumers:
consumer.pause()
try:
consumer.join()
except RuntimeError:
# consumer thread has not started
pass
def shutdown(self):
"""Flush all messages and cleanly shutdown the client"""
self.flush()
self.join()
def require(name, field, data_type):
"""Require that the named `field` has the right `data_type`"""
if not isinstance(field, data_type):
msg = '{0} must have {1}, got: {2}'.format(name, data_type, field)
raise AssertionError(msg)
def stringify_id(val):
if val is None:
return None
if isinstance(val, string_types):
return val
return str(val)