Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
# Copyright (c) 2013, Mahmoud Hashemi | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions are | |
# met: | |
# | |
# * Redistributions of source code must retain the above copyright | |
# notice, this list of conditions and the following disclaimer. | |
# | |
# * Redistributions in binary form must reproduce the above | |
# copyright notice, this list of conditions and the following | |
# disclaimer in the documentation and/or other materials provided | |
# with the distribution. | |
# | |
# * The names of the contributors may not be used to endorse or | |
# promote products derived from this software without specific | |
# prior written permission. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
"""`PEP 3101`_ introduced the :meth:`str.format` method, and what | |
would later be called "new-style" string formatting. For the sake of | |
explicit correctness, it is probably best to refer to Python's dual | |
string formatting capabilities as *bracket-style* and | |
*percent-style*. There is overlap, but one does not replace the | |
other. | |
* Bracket-style is more pluggable, slower, and uses a method. | |
* Percent-style is simpler, faster, and uses an operator. | |
Bracket-style formatting brought with it a much more powerful toolbox, | |
but it was far from a full one. :meth:`str.format` uses `more powerful | |
syntax`_, but `the tools and idioms`_ for working with | |
that syntax are not well-developed nor well-advertised. | |
``formatutils`` adds several functions for working with bracket-style | |
format strings: | |
* :class:`DeferredValue`: Defer fetching or calculating a value | |
until format time. | |
* :func:`get_format_args`: Parse the positional and keyword | |
arguments out of a format string. | |
* :func:`tokenize_format_str`: Tokenize a format string into | |
literals and :class:`BaseFormatField` objects. | |
* :func:`construct_format_field_str`: Assists in programmatic | |
construction of format strings. | |
* :func:`infer_positional_format_args`: Converts anonymous | |
references in 2.7+ format strings to explicit positional arguments | |
suitable for usage with Python 2.6. | |
.. _more powerful syntax: https://docs.python.org/2/library/string.html#format-string-syntax | |
.. _the tools and idioms: https://docs.python.org/2/library/string.html#string-formatting | |
.. _PEP 3101: https://www.python.org/dev/peps/pep-3101/ | |
""" | |
# TODO: also include percent-formatting utils? | |
# TODO: include lithoxyl.formatters.Formatter (or some adaptation)? | |
from __future__ import print_function | |
import re | |
from string import Formatter | |
try: | |
unicode # Python 2 | |
except NameError: | |
unicode = str # Python 3 | |
__all__ = ['DeferredValue', 'get_format_args', 'tokenize_format_str', | |
'construct_format_field_str', 'infer_positional_format_args', | |
'BaseFormatField'] | |
_pos_farg_re = re.compile('({{)|' # escaped open-brace | |
'(}})|' # escaped close-brace | |
r'({[:!.\[}])') # anon positional format arg | |
def construct_format_field_str(fname, fspec, conv): | |
""" | |
Constructs a format field string from the field name, spec, and | |
conversion character (``fname``, ``fspec``, ``conv``). See Python | |
String Formatting for more info. | |
""" | |
if fname is None: | |
return '' | |
ret = '{' + fname | |
if conv: | |
ret += '!' + conv | |
if fspec: | |
ret += ':' + fspec | |
ret += '}' | |
return ret | |
def split_format_str(fstr): | |
"""Does very basic splitting of a format string, returns a list of | |
strings. For full tokenization, see :func:`tokenize_format_str`. | |
""" | |
ret = [] | |
for lit, fname, fspec, conv in Formatter().parse(fstr): | |
if fname is None: | |
ret.append((lit, None)) | |
continue | |
field_str = construct_format_field_str(fname, fspec, conv) | |
ret.append((lit, field_str)) | |
return ret | |
def infer_positional_format_args(fstr): | |
"""Takes format strings with anonymous positional arguments, (e.g., | |
"{}" and {:d}), and converts them into numbered ones for explicitness and | |
compatibility with 2.6. | |
Returns a string with the inferred positional arguments. | |
""" | |
# TODO: memoize | |
ret, max_anon = '', 0 | |
# look for {: or {! or {. or {[ or {} | |
start, end, prev_end = 0, 0, 0 | |
for match in _pos_farg_re.finditer(fstr): | |
start, end, group = match.start(), match.end(), match.group() | |
if prev_end < start: | |
ret += fstr[prev_end:start] | |
prev_end = end | |
if group == '{{' or group == '}}': | |
ret += group | |
continue | |
ret += '{%s%s' % (max_anon, group[1:]) | |
max_anon += 1 | |
ret += fstr[prev_end:] | |
return ret | |
# This approach is hardly exhaustive but it works for most builtins | |
_INTCHARS = 'bcdoxXn' | |
_FLOATCHARS = 'eEfFgGn%' | |
_TYPE_MAP = dict([(x, int) for x in _INTCHARS] + | |
[(x, float) for x in _FLOATCHARS]) | |
_TYPE_MAP['s'] = str | |
def get_format_args(fstr): | |
""" | |
Turn a format string into two lists of arguments referenced by the | |
format string. One is positional arguments, and the other is named | |
arguments. Each element of the list includes the name and the | |
nominal type of the field. | |
# >>> get_format_args("{noun} is {1:d} years old{punct}") | |
# ([(1, <type 'int'>)], [('noun', <type 'str'>), ('punct', <type 'str'>)]) | |
# XXX: Py3k | |
>>> get_format_args("{noun} is {1:d} years old{punct}") == \ | |
([(1, int)], [('noun', str), ('punct', str)]) | |
True | |
""" | |
# TODO: memoize | |
formatter = Formatter() | |
fargs, fkwargs, _dedup = [], [], set() | |
def _add_arg(argname, type_char='s'): | |
if argname not in _dedup: | |
_dedup.add(argname) | |
argtype = _TYPE_MAP.get(type_char, str) # TODO: unicode | |
try: | |
fargs.append((int(argname), argtype)) | |
except ValueError: | |
fkwargs.append((argname, argtype)) | |
for lit, fname, fspec, conv in formatter.parse(fstr): | |
if fname is not None: | |
type_char = fspec[-1:] | |
fname_list = re.split('[.[]', fname) | |
if len(fname_list) > 1: | |
raise ValueError('encountered compound format arg: %r' % fname) | |
try: | |
base_fname = fname_list[0] | |
assert base_fname | |
except (IndexError, AssertionError): | |
raise ValueError('encountered anonymous positional argument') | |
_add_arg(fname, type_char) | |
for sublit, subfname, _, _ in formatter.parse(fspec): | |
# TODO: positional and anon args not allowed here. | |
if subfname is not None: | |
_add_arg(subfname) | |
return fargs, fkwargs | |
def tokenize_format_str(fstr, resolve_pos=True): | |
"""Takes a format string, turns it into a list of alternating string | |
literals and :class:`BaseFormatField` tokens. By default, also | |
infers anonymous positional references into explicit, numbered | |
positional references. To disable this behavior set *resolve_pos* | |
to ``False``. | |
""" | |
ret = [] | |
if resolve_pos: | |
fstr = infer_positional_format_args(fstr) | |
formatter = Formatter() | |
for lit, fname, fspec, conv in formatter.parse(fstr): | |
if lit: | |
ret.append(lit) | |
if fname is None: | |
continue | |
ret.append(BaseFormatField(fname, fspec, conv)) | |
return ret | |
class BaseFormatField(object): | |
"""A class representing a reference to an argument inside of a | |
bracket-style format string. For instance, in ``"{greeting}, | |
world!"``, there is a field named "greeting". | |
These fields can have many options applied to them. See the | |
Python docs on `Format String Syntax`_ for the full details. | |
.. _Format String Syntax: https://docs.python.org/2/library/string.html#string-formatting | |
""" | |
def __init__(self, fname, fspec='', conv=None): | |
self.set_fname(fname) | |
self.set_fspec(fspec) | |
self.set_conv(conv) | |
def set_fname(self, fname): | |
"Set the field name." | |
path_list = re.split('[.[]', fname) # TODO | |
self.base_name = path_list[0] | |
self.fname = fname | |
self.subpath = path_list[1:] | |
self.is_positional = not self.base_name or self.base_name.isdigit() | |
def set_fspec(self, fspec): | |
"Set the field spec." | |
fspec = fspec or '' | |
subfields = [] | |
for sublit, subfname, _, _ in Formatter().parse(fspec): | |
if subfname is not None: | |
subfields.append(subfname) | |
self.subfields = subfields | |
self.fspec = fspec | |
self.type_char = fspec[-1:] | |
self.type_func = _TYPE_MAP.get(self.type_char, str) | |
def set_conv(self, conv): | |
"""There are only two built-in converters: ``s`` and ``r``. They are | |
somewhat rare and appearlike ``"{ref!r}"``.""" | |
# TODO | |
self.conv = conv | |
self.conv_func = None # TODO | |
def fstr(self): | |
"The current state of the field in string format." | |
return construct_format_field_str(self.fname, self.fspec, self.conv) | |
def __repr__(self): | |
cn = self.__class__.__name__ | |
args = [self.fname] | |
if self.conv is not None: | |
args.extend([self.fspec, self.conv]) | |
elif self.fspec != '': | |
args.append(self.fspec) | |
args_repr = ', '.join([repr(a) for a in args]) | |
return '%s(%s)' % (cn, args_repr) | |
def __str__(self): | |
return self.fstr | |
_UNSET = object() | |
class DeferredValue(object): | |
""":class:`DeferredValue` is a wrapper type, used to defer computing | |
values which would otherwise be expensive to stringify and | |
format. This is most valuable in areas like logging, where one | |
would not want to waste time formatting a value for a log message | |
which will subsequently be filtered because the message's log | |
level was DEBUG and the logger was set to only emit CRITICAL | |
messages. | |
The :class:``DeferredValue`` is initialized with a callable that | |
takes no arguments and returns the value, which can be of any | |
type. By default DeferredValue only calls that callable once, and | |
future references will get a cached value. This behavior can be | |
disabled by setting *cache_value* to ``False``. | |
Args: | |
func (function): A callable that takes no arguments and | |
computes the value being represented. | |
cache_value (bool): Whether subsequent usages will call *func* | |
again. Defaults to ``True``. | |
>>> import sys | |
>>> dv = DeferredValue(lambda: len(sys._current_frames())) | |
>>> output = "works great in all {0} threads!".format(dv) | |
PROTIP: To keep lines shorter, use: ``from formatutils import | |
DeferredValue as DV`` | |
""" | |
def __init__(self, func, cache_value=True): | |
self.func = func | |
self.cache_value = cache_value | |
self._value = _UNSET | |
def get_value(self): | |
"""Computes, optionally caches, and returns the value of the | |
*func*. If ``get_value()`` has been called before, a cached | |
value may be returned depending on the *cache_value* option | |
passed to the constructor. | |
""" | |
if self._value is not _UNSET and self.cache_value: | |
value = self._value | |
else: | |
value = self.func() | |
if self.cache_value: | |
self._value = value | |
return value | |
def __int__(self): | |
return int(self.get_value()) | |
def __float__(self): | |
return float(self.get_value()) | |
def __str__(self): | |
return str(self.get_value()) | |
def __unicode__(self): | |
return unicode(self.get_value()) | |
def __repr__(self): | |
return repr(self.get_value()) | |
def __format__(self, fmt): | |
value = self.get_value() | |
pt = fmt[-1:] # presentation type | |
type_conv = _TYPE_MAP.get(pt, str) | |
try: | |
return value.__format__(fmt) | |
except (ValueError, TypeError): | |
# TODO: this may be overkill | |
return type_conv(value).__format__(fmt) | |
# end formatutils.py | |