DrFetWartz's picture
Upload folder using huggingface_hub
ffaa9fc
# -*- coding: utf-8 -*-
# Copyright (c) 2013, Mahmoud Hashemi
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# * The names of the contributors may not be used to endorse or
# promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""As a programming ecosystem grows, so do the chances of runtime
variability.
Python boasts one of the widest deployments for a high-level
programming environment, making it a viable target for all manner of
application. But with breadth comes variance, so it's important to
know what you're working with.
Some basic variations that are common among development machines:
* **Executable runtime**: CPython, PyPy, Jython, etc., plus build date and compiler
* **Language version**: 2.4, 2.5, 2.6, 2.7... 3.4, 3.5, 3.6
* **Host operating system**: Windows, OS X, Ubuntu, Debian, CentOS, RHEL, etc.
* **Features**: 64-bit, IPv6, Unicode character support (UCS-2/UCS-4)
* **Built-in library support**: OpenSSL, threading, SQLite, zlib
* **User environment**: umask, ulimit, working directory path
* **Machine info**: CPU count, hostname, filesystem encoding
See the full example profile below for more.
ecoutils was created to quantify that variability. ecoutils quickly
produces an information-dense description of critical runtime factors,
with minimal side effects. In short, ecoutils is like browser and user
agent analytics, but for Python environments.
Transmission and collection
---------------------------
The data is all JSON serializable, and is suitable for sending to a
central analytics server. An HTTP-backed service for this can be found
at: https://github.com/mahmoud/espymetrics/
Notable omissions
-----------------
Due to space constraints (and possibly latency constraints), the
following information is deemed not dense enough, and thus omitted:
* :data:`sys.path`
* full :mod:`sysconfig`
* environment variables (:data:`os.environ`)
Compatibility
-------------
So far ecoutils has has been tested on Python 2.4, 2.5, 2.6, 2.7, 3.4,
3.5, and PyPy. Various versions have been tested on Ubuntu, Debian,
RHEL, OS X, FreeBSD, and Windows 7.
.. note:: Boltons typically only support back to Python 2.6, but due
to its nature, ecoutils extends backwards compatibility to Python
2.4 and 2.5.
Profile generation
------------------
Profiles are generated by :func:`ecoutils.get_profile`.
When run as a module, ecoutils will call :func:`~ecoutils.get_profile`
and print a profile in JSON format::
$ python -m boltons.ecoutils
{
"_eco_version": "1.0.0",
"cpu_count": 4,
"cwd": "/home/mahmoud/projects/boltons",
"fs_encoding": "UTF-8",
"guid": "6b139e7bbf5ad4ed8d4063bf6235b4d2",
"hostfqdn": "mahmoud-host",
"hostname": "mahmoud-host",
"linux_dist_name": "Ubuntu",
"linux_dist_version": "14.04",
"python": {
"argv": "boltons/ecoutils.py",
"bin": "/usr/bin/python",
"build_date": "Jun 22 2015 17:58:13",
"compiler": "GCC 4.8.2",
"features": {
"64bit": true,
"expat": "expat_2.1.0",
"ipv6": true,
"openssl": "OpenSSL 1.0.1f 6 Jan 2014",
"readline": true,
"sqlite": "3.8.2",
"threading": true,
"tkinter": "8.6",
"unicode_wide": true,
"zlib": "1.2.8"
},
"version": "2.7.6 (default, Jun 22 2015, 17:58:13) [GCC 4.8.2]",
"version_info": [
2,
7,
6,
"final",
0
]
},
"time_utc": "2016-05-24 07:59:40.473140",
"time_utc_offset": -8.0,
"ulimit_hard": 4096,
"ulimit_soft": 1024,
"umask": "002",
"uname": {
"machine": "x86_64",
"node": "mahmoud-host",
"processor": "x86_64",
"release": "3.13.0-85-generic",
"system": "Linux",
"version": "#129-Ubuntu SMP Thu Mar 17 20:50:15 UTC 2016"
},
"username": "mahmoud"
}
``pip install boltons`` and try it yourself!
"""
import re
import os
import sys
import time
import pprint
import random
import socket
import struct
import getpass
import datetime
import platform
ECO_VERSION = '1.0.1' # see version history below
PY_GT_2 = sys.version_info[0] > 2
try:
getrandbits = random.SystemRandom().getrandbits
HAVE_URANDOM = True
except Exception:
HAVE_URANDOM = False
getrandbits = random.getrandbits
# 128-bit GUID just like a UUID, but backwards compatible to 2.4
INSTANCE_ID = hex(getrandbits(128))[2:-1].lower()
IS_64BIT = struct.calcsize("P") > 4
HAVE_UCS4 = getattr(sys, 'maxunicode', 0) > 65536
HAVE_READLINE = True
try:
import readline
except Exception:
HAVE_READLINE = False
try:
import sqlite3
SQLITE_VERSION = sqlite3.sqlite_version
except Exception:
# note: 2.5 and older have sqlite, but not sqlite3
SQLITE_VERSION = ''
try:
import ssl
try:
OPENSSL_VERSION = ssl.OPENSSL_VERSION
except AttributeError:
# This is a conservative estimate for Python <2.6
# SSL module added in 2006, when 0.9.7 was standard
OPENSSL_VERSION = 'OpenSSL >0.8.0'
except Exception:
OPENSSL_VERSION = ''
try:
if PY_GT_2:
import tkinter
else:
import Tkinter as tkinter
TKINTER_VERSION = str(tkinter.TkVersion)
except Exception:
TKINTER_VERSION = ''
try:
import zlib
ZLIB_VERSION = zlib.ZLIB_VERSION
except Exception:
ZLIB_VERSION = ''
try:
from xml.parsers import expat
EXPAT_VERSION = expat.EXPAT_VERSION
except Exception:
EXPAT_VERSION = ''
try:
from multiprocessing import cpu_count
CPU_COUNT = cpu_count()
except Exception:
CPU_COUNT = 0
try:
import threading
HAVE_THREADING = True
except Exception:
HAVE_THREADING = False
try:
HAVE_IPV6 = socket.has_ipv6
except Exception:
HAVE_IPV6 = False
try:
from resource import getrlimit, RLIMIT_NOFILE
RLIMIT_FDS_SOFT, RLIMIT_FDS_HARD = getrlimit(RLIMIT_NOFILE)
except Exception:
RLIMIT_FDS_SOFT, RLIMIT_FDS_HARD = 0, 0
START_TIME_INFO = {'time_utc': str(datetime.datetime.utcnow()),
'time_utc_offset': -time.timezone / 3600.0}
def get_python_info():
ret = {}
ret['argv'] = _escape_shell_args(sys.argv)
ret['bin'] = sys.executable
# Even though compiler/build_date are already here, they're
# actually parsed from the version string. So, in the rare case of
# the unparsable version string, we're still transmitting it.
ret['version'] = ' '.join(sys.version.split())
ret['compiler'] = platform.python_compiler()
ret['build_date'] = platform.python_build()[1]
ret['version_info'] = list(sys.version_info)
ret['features'] = {'openssl': OPENSSL_VERSION,
'expat': EXPAT_VERSION,
'sqlite': SQLITE_VERSION,
'tkinter': TKINTER_VERSION,
'zlib': ZLIB_VERSION,
'unicode_wide': HAVE_UCS4,
'readline': HAVE_READLINE,
'64bit': IS_64BIT,
'ipv6': HAVE_IPV6,
'threading': HAVE_THREADING,
'urandom': HAVE_URANDOM}
return ret
def get_profile(**kwargs):
"""The main entrypoint to ecoutils. Calling this will return a
JSON-serializable dictionary of information about the current
process.
It is very unlikely that the information returned will change
during the lifetime of the process, and in most cases the majority
of the information stays the same between runs as well.
:func:`get_profile` takes one optional keyword argument, *scrub*,
a :class:`bool` that, if True, blanks out identifiable
information. This includes current working directory, hostname,
Python executable path, command-line arguments, and
username. Values are replaced with '-', but for compatibility keys
remain in place.
"""
scrub = kwargs.pop('scrub', False)
if kwargs:
raise TypeError('unexpected keyword arguments: %r' % (kwargs.keys(),))
ret = {}
try:
ret['username'] = getpass.getuser()
except Exception:
ret['username'] = ''
ret['guid'] = str(INSTANCE_ID)
ret['hostname'] = socket.gethostname()
ret['hostfqdn'] = socket.getfqdn()
uname = platform.uname()
ret['uname'] = {'system': uname[0],
'node': uname[1],
'release': uname[2], # linux: distro name
'version': uname[3], # linux: kernel version
'machine': uname[4],
'processor': uname[5]}
try:
linux_dist = platform.linux_distribution()
except Exception:
linux_dist = ('', '', '')
ret['linux_dist_name'] = linux_dist[0]
ret['linux_dist_version'] = linux_dist[1]
ret['cpu_count'] = CPU_COUNT
ret['fs_encoding'] = sys.getfilesystemencoding()
ret['ulimit_soft'] = RLIMIT_FDS_SOFT
ret['ulimit_hard'] = RLIMIT_FDS_HARD
ret['cwd'] = os.getcwd()
ret['umask'] = oct(os.umask(os.umask(2))).rjust(3, '0')
ret['python'] = get_python_info()
ret.update(START_TIME_INFO)
ret['_eco_version'] = ECO_VERSION
if scrub:
# mask identifiable information
ret['cwd'] = '-'
ret['hostname'] = '-'
ret['hostfqdn'] = '-'
ret['python']['bin'] = '-'
ret['python']['argv'] = '-'
ret['uname']['node'] = '-'
ret['username'] = '-'
return ret
try:
import json
def dumps(val, indent):
if indent:
return json.dumps(val, sort_keys=True, indent=indent)
return json.dumps(val, sort_keys=True)
except ImportError:
_real_safe_repr = pprint._safe_repr
def _fake_json_dumps(val, indent=2):
# never do this. this is a hack for Python 2.4. Python 2.5 added
# the json module for a reason.
def _fake_safe_repr(*a, **kw):
res, is_read, is_rec = _real_safe_repr(*a, **kw)
if res == 'None':
res = 'null'
if res == 'True':
res = 'true'
if res == 'False':
res = 'false'
if not (res.startswith("'") or res.startswith("u'")):
res = res
else:
if res.startswith('u'):
res = res[1:]
contents = res[1:-1]
contents = contents.replace('"', '').replace(r'\"', '')
res = '"' + contents + '"'
return res, is_read, is_rec
pprint._safe_repr = _fake_safe_repr
try:
ret = pprint.pformat(val, indent=indent)
finally:
pprint._safe_repr = _real_safe_repr
return ret
def dumps(val, indent):
ret = _fake_json_dumps(val, indent=indent)
if not indent:
ret = re.sub(r'\n\s*', ' ', ret)
return ret
def get_profile_json(indent=False):
if indent:
indent = 2
else:
indent = 0
data_dict = get_profile()
return dumps(data_dict, indent)
def main():
print(get_profile_json(indent=True))
#############################################
# The shell escaping copied in from strutils
#############################################
def _escape_shell_args(args, sep=' ', style=None):
if not style:
if sys.platform == 'win32':
style = 'cmd'
else:
style = 'sh'
if style == 'sh':
return _args2sh(args, sep=sep)
elif style == 'cmd':
return _args2cmd(args, sep=sep)
raise ValueError("style expected one of 'cmd' or 'sh', not %r" % style)
_find_sh_unsafe = re.compile(r'[^a-zA-Z0-9_@%+=:,./-]').search
def _args2sh(args, sep=' '):
# see strutils
ret_list = []
for arg in args:
if not arg:
ret_list.append("''")
continue
if _find_sh_unsafe(arg) is None:
ret_list.append(arg)
continue
# use single quotes, and put single quotes into double quotes
# the string $'b is then quoted as '$'"'"'b'
ret_list.append("'" + arg.replace("'", "'\"'\"'") + "'")
return ' '.join(ret_list)
def _args2cmd(args, sep=' '):
# see strutils
result = []
needquote = False
for arg in args:
bs_buf = []
# Add a space to separate this argument from the others
if result:
result.append(' ')
needquote = (" " in arg) or ("\t" in arg) or not arg
if needquote:
result.append('"')
for c in arg:
if c == '\\':
# Don't know if we need to double yet.
bs_buf.append(c)
elif c == '"':
# Double backslashes.
result.append('\\' * len(bs_buf)*2)
bs_buf = []
result.append('\\"')
else:
# Normal char
if bs_buf:
result.extend(bs_buf)
bs_buf = []
result.append(c)
# Add remaining backslashes, if any.
if bs_buf:
result.extend(bs_buf)
if needquote:
result.extend(bs_buf)
result.append('"')
return ''.join(result)
############################
# End shell escaping code
############################
if __name__ == '__main__':
main()
"""
ecoutils protocol version history
---------------------------------
The version is ECO_VERSION module-level constant, and _eco_version key
in the dictionary returned from ecoutils.get_profile().
1.0.1 - (boltons version 16.3.2+) Remove uuid dependency and add HAVE_URANDOM
1.0.0 - (boltons version 16.3.0-16.3.1) Initial release
"""