Spaces:
Running
Running
# -*- coding: utf-8 -*- | |
""" | |
babel.numbers | |
~~~~~~~~~~~~~ | |
CLDR Plural support. See UTS #35. | |
:copyright: (c) 2013-2021 by the Babel Team. | |
:license: BSD, see LICENSE for more details. | |
""" | |
import re | |
from babel._compat import decimal | |
_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other') | |
_fallback_tag = 'other' | |
def extract_operands(source): | |
"""Extract operands from a decimal, a float or an int, according to `CLDR rules`_. | |
The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows: | |
====== =============================================================== | |
Symbol Value | |
------ --------------------------------------------------------------- | |
n absolute value of the source number (integer and decimals). | |
i integer digits of n. | |
v number of visible fraction digits in n, with trailing zeros. | |
w number of visible fraction digits in n, without trailing zeros. | |
f visible fractional digits in n, with trailing zeros. | |
t visible fractional digits in n, without trailing zeros. | |
====== =============================================================== | |
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands | |
:param source: A real number | |
:type source: int|float|decimal.Decimal | |
:return: A n-i-v-w-f-t tuple | |
:rtype: tuple[decimal.Decimal, int, int, int, int, int] | |
""" | |
n = abs(source) | |
i = int(n) | |
if isinstance(n, float): | |
if i == n: | |
n = i | |
else: | |
# Cast the `float` to a number via the string representation. | |
# This is required for Python 2.6 anyway (it will straight out fail to | |
# do the conversion otherwise), and it's highly unlikely that the user | |
# actually wants the lossless conversion behavior (quoting the Python | |
# documentation): | |
# > If value is a float, the binary floating point value is losslessly | |
# > converted to its exact decimal equivalent. | |
# > This conversion can often require 53 or more digits of precision. | |
# Should the user want that behavior, they can simply pass in a pre- | |
# converted `Decimal` instance of desired accuracy. | |
n = decimal.Decimal(str(n)) | |
if isinstance(n, decimal.Decimal): | |
dec_tuple = n.as_tuple() | |
exp = dec_tuple.exponent | |
fraction_digits = dec_tuple.digits[exp:] if exp < 0 else () | |
trailing = ''.join(str(d) for d in fraction_digits) | |
no_trailing = trailing.rstrip('0') | |
v = len(trailing) | |
w = len(no_trailing) | |
f = int(trailing or 0) | |
t = int(no_trailing or 0) | |
else: | |
v = w = f = t = 0 | |
return n, i, v, w, f, t | |
class PluralRule(object): | |
"""Represents a set of language pluralization rules. The constructor | |
accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The | |
resulting object is callable and accepts one parameter with a positive or | |
negative number (both integer and float) for the number that indicates the | |
plural form for a string and returns the tag for the format: | |
>>> rule = PluralRule({'one': 'n is 1'}) | |
>>> rule(1) | |
'one' | |
>>> rule(2) | |
'other' | |
Currently the CLDR defines these tags: zero, one, two, few, many and | |
other where other is an implicit default. Rules should be mutually | |
exclusive; for a given numeric value, only one rule should apply (i.e. | |
the condition should only be true for one of the plural rule elements. | |
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules | |
""" | |
__slots__ = ('abstract', '_func') | |
def __init__(self, rules): | |
"""Initialize the rule instance. | |
:param rules: a list of ``(tag, expr)``) tuples with the rules | |
conforming to UTS #35 or a dict with the tags as keys | |
and expressions as values. | |
:raise RuleError: if the expression is malformed | |
""" | |
if isinstance(rules, dict): | |
rules = rules.items() | |
found = set() | |
self.abstract = [] | |
for key, expr in sorted(list(rules)): | |
if key not in _plural_tags: | |
raise ValueError('unknown tag %r' % key) | |
elif key in found: | |
raise ValueError('tag %r defined twice' % key) | |
found.add(key) | |
ast = _Parser(expr).ast | |
if ast: | |
self.abstract.append((key, ast)) | |
def __repr__(self): | |
rules = self.rules | |
return '<%s %r>' % ( | |
type(self).__name__, | |
', '.join(['%s: %s' % (tag, rules[tag]) for tag in _plural_tags | |
if tag in rules]) | |
) | |
def parse(cls, rules): | |
"""Create a `PluralRule` instance for the given rules. If the rules | |
are a `PluralRule` object, that object is returned. | |
:param rules: the rules as list or dict, or a `PluralRule` object | |
:raise RuleError: if the expression is malformed | |
""" | |
if isinstance(rules, cls): | |
return rules | |
return cls(rules) | |
def rules(self): | |
"""The `PluralRule` as a dict of unicode plural rules. | |
>>> rule = PluralRule({'one': 'n is 1'}) | |
>>> rule.rules | |
{'one': 'n is 1'} | |
""" | |
_compile = _UnicodeCompiler().compile | |
return dict([(tag, _compile(ast)) for tag, ast in self.abstract]) | |
tags = property(lambda x: frozenset([i[0] for i in x.abstract]), doc=""" | |
A set of explicitly defined tags in this rule. The implicit default | |
``'other'`` rules is not part of this set unless there is an explicit | |
rule for it.""") | |
def __getstate__(self): | |
return self.abstract | |
def __setstate__(self, abstract): | |
self.abstract = abstract | |
def __call__(self, n): | |
if not hasattr(self, '_func'): | |
self._func = to_python(self) | |
return self._func(n) | |
def to_javascript(rule): | |
"""Convert a list/dict of rules or a `PluralRule` object into a JavaScript | |
function. This function depends on no external library: | |
>>> to_javascript({'one': 'n is 1'}) | |
"(function(n) { return (n == 1) ? 'one' : 'other'; })" | |
Implementation detail: The function generated will probably evaluate | |
expressions involved into range operations multiple times. This has the | |
advantage that external helper functions are not required and is not a | |
big performance hit for these simple calculations. | |
:param rule: the rules as list or dict, or a `PluralRule` object | |
:raise RuleError: if the expression is malformed | |
""" | |
to_js = _JavaScriptCompiler().compile | |
result = ['(function(n) { return '] | |
for tag, ast in PluralRule.parse(rule).abstract: | |
result.append('%s ? %r : ' % (to_js(ast), tag)) | |
result.append('%r; })' % _fallback_tag) | |
return ''.join(result) | |
def to_python(rule): | |
"""Convert a list/dict of rules or a `PluralRule` object into a regular | |
Python function. This is useful in situations where you need a real | |
function and don't are about the actual rule object: | |
>>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'}) | |
>>> func(1) | |
'one' | |
>>> func(3) | |
'few' | |
>>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'}) | |
>>> func(11) | |
'one' | |
>>> func(15) | |
'few' | |
:param rule: the rules as list or dict, or a `PluralRule` object | |
:raise RuleError: if the expression is malformed | |
""" | |
namespace = { | |
'IN': in_range_list, | |
'WITHIN': within_range_list, | |
'MOD': cldr_modulo, | |
'extract_operands': extract_operands, | |
} | |
to_python_func = _PythonCompiler().compile | |
result = [ | |
'def evaluate(n):', | |
' n, i, v, w, f, t = extract_operands(n)', | |
] | |
for tag, ast in PluralRule.parse(rule).abstract: | |
# the str() call is to coerce the tag to the native string. It's | |
# a limited ascii restricted set of tags anyways so that is fine. | |
result.append(' if (%s): return %r' % (to_python_func(ast), str(tag))) | |
result.append(' return %r' % _fallback_tag) | |
code = compile('\n'.join(result), '<rule>', 'exec') | |
eval(code, namespace) | |
return namespace['evaluate'] | |
def to_gettext(rule): | |
"""The plural rule as gettext expression. The gettext expression is | |
technically limited to integers and returns indices rather than tags. | |
>>> to_gettext({'one': 'n is 1', 'two': 'n is 2'}) | |
'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2)' | |
:param rule: the rules as list or dict, or a `PluralRule` object | |
:raise RuleError: if the expression is malformed | |
""" | |
rule = PluralRule.parse(rule) | |
used_tags = rule.tags | {_fallback_tag} | |
_compile = _GettextCompiler().compile | |
_get_index = [tag for tag in _plural_tags if tag in used_tags].index | |
result = ['nplurals=%d; plural=(' % len(used_tags)] | |
for tag, ast in rule.abstract: | |
result.append('%s ? %d : ' % (_compile(ast), _get_index(tag))) | |
result.append('%d)' % _get_index(_fallback_tag)) | |
return ''.join(result) | |
def in_range_list(num, range_list): | |
"""Integer range list test. This is the callback for the "in" operator | |
of the UTS #35 pluralization rule language: | |
>>> in_range_list(1, [(1, 3)]) | |
True | |
>>> in_range_list(3, [(1, 3)]) | |
True | |
>>> in_range_list(3, [(1, 3), (5, 8)]) | |
True | |
>>> in_range_list(1.2, [(1, 4)]) | |
False | |
>>> in_range_list(10, [(1, 4)]) | |
False | |
>>> in_range_list(10, [(1, 4), (6, 8)]) | |
False | |
""" | |
return num == int(num) and within_range_list(num, range_list) | |
def within_range_list(num, range_list): | |
"""Float range test. This is the callback for the "within" operator | |
of the UTS #35 pluralization rule language: | |
>>> within_range_list(1, [(1, 3)]) | |
True | |
>>> within_range_list(1.0, [(1, 3)]) | |
True | |
>>> within_range_list(1.2, [(1, 4)]) | |
True | |
>>> within_range_list(8.8, [(1, 4), (7, 15)]) | |
True | |
>>> within_range_list(10, [(1, 4)]) | |
False | |
>>> within_range_list(10.5, [(1, 4), (20, 30)]) | |
False | |
""" | |
return any(num >= min_ and num <= max_ for min_, max_ in range_list) | |
def cldr_modulo(a, b): | |
"""Javaish modulo. This modulo operator returns the value with the sign | |
of the dividend rather than the divisor like Python does: | |
>>> cldr_modulo(-3, 5) | |
-3 | |
>>> cldr_modulo(-3, -5) | |
-3 | |
>>> cldr_modulo(3, 5) | |
3 | |
""" | |
reverse = 0 | |
if a < 0: | |
a *= -1 | |
reverse = 1 | |
if b < 0: | |
b *= -1 | |
rv = a % b | |
if reverse: | |
rv *= -1 | |
return rv | |
class RuleError(Exception): | |
"""Raised if a rule is malformed.""" | |
_VARS = 'nivwft' | |
_RULES = [ | |
(None, re.compile(r'\s+', re.UNICODE)), | |
('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[{0}])\b' | |
.format(_VARS))), | |
('value', re.compile(r'\d+')), | |
('symbol', re.compile(r'%|,|!=|=')), | |
('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS | |
] | |
def tokenize_rule(s): | |
s = s.split('@')[0] | |
result = [] | |
pos = 0 | |
end = len(s) | |
while pos < end: | |
for tok, rule in _RULES: | |
match = rule.match(s, pos) | |
if match is not None: | |
pos = match.end() | |
if tok: | |
result.append((tok, match.group())) | |
break | |
else: | |
raise RuleError('malformed CLDR pluralization rule. ' | |
'Got unexpected %r' % s[pos]) | |
return result[::-1] | |
def test_next_token(tokens, type_, value=None): | |
return tokens and tokens[-1][0] == type_ and \ | |
(value is None or tokens[-1][1] == value) | |
def skip_token(tokens, type_, value=None): | |
if test_next_token(tokens, type_, value): | |
return tokens.pop() | |
def value_node(value): | |
return 'value', (value, ) | |
def ident_node(name): | |
return name, () | |
def range_list_node(range_list): | |
return 'range_list', range_list | |
def negate(rv): | |
return 'not', (rv,) | |
class _Parser(object): | |
"""Internal parser. This class can translate a single rule into an abstract | |
tree of tuples. It implements the following grammar:: | |
condition = and_condition ('or' and_condition)* | |
('@integer' samples)? | |
('@decimal' samples)? | |
and_condition = relation ('and' relation)* | |
relation = is_relation | in_relation | within_relation | |
is_relation = expr 'is' ('not')? value | |
in_relation = expr (('not')? 'in' | '=' | '!=') range_list | |
within_relation = expr ('not')? 'within' range_list | |
expr = operand (('mod' | '%') value)? | |
operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w' | |
range_list = (range | value) (',' range_list)* | |
value = digit+ | |
digit = 0|1|2|3|4|5|6|7|8|9 | |
range = value'..'value | |
samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))? | |
sampleRange = decimalValue '~' decimalValue | |
decimalValue = value ('.' value)? | |
- Whitespace can occur between or around any of the above tokens. | |
- Rules should be mutually exclusive; for a given numeric value, only one | |
rule should apply (i.e. the condition should only be true for one of | |
the plural rule elements). | |
- The in and within relations can take comma-separated lists, such as: | |
'n in 3,5,7..15'. | |
- Samples are ignored. | |
The translator parses the expression on instanciation into an attribute | |
called `ast`. | |
""" | |
def __init__(self, string): | |
self.tokens = tokenize_rule(string) | |
if not self.tokens: | |
# If the pattern is only samples, it's entirely possible | |
# no stream of tokens whatsoever is generated. | |
self.ast = None | |
return | |
self.ast = self.condition() | |
if self.tokens: | |
raise RuleError('Expected end of rule, got %r' % | |
self.tokens[-1][1]) | |
def expect(self, type_, value=None, term=None): | |
token = skip_token(self.tokens, type_, value) | |
if token is not None: | |
return token | |
if term is None: | |
term = repr(value is None and type_ or value) | |
if not self.tokens: | |
raise RuleError('expected %s but end of rule reached' % term) | |
raise RuleError('expected %s but got %r' % (term, self.tokens[-1][1])) | |
def condition(self): | |
op = self.and_condition() | |
while skip_token(self.tokens, 'word', 'or'): | |
op = 'or', (op, self.and_condition()) | |
return op | |
def and_condition(self): | |
op = self.relation() | |
while skip_token(self.tokens, 'word', 'and'): | |
op = 'and', (op, self.relation()) | |
return op | |
def relation(self): | |
left = self.expr() | |
if skip_token(self.tokens, 'word', 'is'): | |
return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \ | |
(left, self.value()) | |
negated = skip_token(self.tokens, 'word', 'not') | |
method = 'in' | |
if skip_token(self.tokens, 'word', 'within'): | |
method = 'within' | |
else: | |
if not skip_token(self.tokens, 'word', 'in'): | |
if negated: | |
raise RuleError('Cannot negate operator based rules.') | |
return self.newfangled_relation(left) | |
rv = 'relation', (method, left, self.range_list()) | |
return negate(rv) if negated else rv | |
def newfangled_relation(self, left): | |
if skip_token(self.tokens, 'symbol', '='): | |
negated = False | |
elif skip_token(self.tokens, 'symbol', '!='): | |
negated = True | |
else: | |
raise RuleError('Expected "=" or "!=" or legacy relation') | |
rv = 'relation', ('in', left, self.range_list()) | |
return negate(rv) if negated else rv | |
def range_or_value(self): | |
left = self.value() | |
if skip_token(self.tokens, 'ellipsis'): | |
return left, self.value() | |
else: | |
return left, left | |
def range_list(self): | |
range_list = [self.range_or_value()] | |
while skip_token(self.tokens, 'symbol', ','): | |
range_list.append(self.range_or_value()) | |
return range_list_node(range_list) | |
def expr(self): | |
word = skip_token(self.tokens, 'word') | |
if word is None or word[1] not in _VARS: | |
raise RuleError('Expected identifier variable') | |
name = word[1] | |
if skip_token(self.tokens, 'word', 'mod'): | |
return 'mod', ((name, ()), self.value()) | |
elif skip_token(self.tokens, 'symbol', '%'): | |
return 'mod', ((name, ()), self.value()) | |
return ident_node(name) | |
def value(self): | |
return value_node(int(self.expect('value')[1])) | |
def _binary_compiler(tmpl): | |
"""Compiler factory for the `_Compiler`.""" | |
return lambda self, l, r: tmpl % (self.compile(l), self.compile(r)) | |
def _unary_compiler(tmpl): | |
"""Compiler factory for the `_Compiler`.""" | |
return lambda self, x: tmpl % self.compile(x) | |
compile_zero = lambda x: '0' | |
class _Compiler(object): | |
"""The compilers are able to transform the expressions into multiple | |
output formats. | |
""" | |
def compile(self, arg): | |
op, args = arg | |
return getattr(self, 'compile_' + op)(*args) | |
compile_n = lambda x: 'n' | |
compile_i = lambda x: 'i' | |
compile_v = lambda x: 'v' | |
compile_w = lambda x: 'w' | |
compile_f = lambda x: 'f' | |
compile_t = lambda x: 't' | |
compile_value = lambda x, v: str(v) | |
compile_and = _binary_compiler('(%s && %s)') | |
compile_or = _binary_compiler('(%s || %s)') | |
compile_not = _unary_compiler('(!%s)') | |
compile_mod = _binary_compiler('(%s %% %s)') | |
compile_is = _binary_compiler('(%s == %s)') | |
compile_isnot = _binary_compiler('(%s != %s)') | |
def compile_relation(self, method, expr, range_list): | |
raise NotImplementedError() | |
class _PythonCompiler(_Compiler): | |
"""Compiles an expression to Python.""" | |
compile_and = _binary_compiler('(%s and %s)') | |
compile_or = _binary_compiler('(%s or %s)') | |
compile_not = _unary_compiler('(not %s)') | |
compile_mod = _binary_compiler('MOD(%s, %s)') | |
def compile_relation(self, method, expr, range_list): | |
compile_range_list = '[%s]' % ','.join( | |
['(%s, %s)' % tuple(map(self.compile, range_)) | |
for range_ in range_list[1]]) | |
return '%s(%s, %s)' % (method.upper(), self.compile(expr), | |
compile_range_list) | |
class _GettextCompiler(_Compiler): | |
"""Compile into a gettext plural expression.""" | |
compile_i = _Compiler.compile_n | |
compile_v = compile_zero | |
compile_w = compile_zero | |
compile_f = compile_zero | |
compile_t = compile_zero | |
def compile_relation(self, method, expr, range_list): | |
rv = [] | |
expr = self.compile(expr) | |
for item in range_list[1]: | |
if item[0] == item[1]: | |
rv.append('(%s == %s)' % ( | |
expr, | |
self.compile(item[0]) | |
)) | |
else: | |
min, max = map(self.compile, item) | |
rv.append('(%s >= %s && %s <= %s)' % ( | |
expr, | |
min, | |
expr, | |
max | |
)) | |
return '(%s)' % ' || '.join(rv) | |
class _JavaScriptCompiler(_GettextCompiler): | |
"""Compiles the expression to plain of JavaScript.""" | |
# XXX: presently javascript does not support any of the | |
# fraction support and basically only deals with integers. | |
compile_i = lambda x: 'parseInt(n, 10)' | |
compile_v = compile_zero | |
compile_w = compile_zero | |
compile_f = compile_zero | |
compile_t = compile_zero | |
def compile_relation(self, method, expr, range_list): | |
code = _GettextCompiler.compile_relation( | |
self, method, expr, range_list) | |
if method == 'in': | |
expr = self.compile(expr) | |
code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code) | |
return code | |
class _UnicodeCompiler(_Compiler): | |
"""Returns a unicode pluralization rule again.""" | |
# XXX: this currently spits out the old syntax instead of the new | |
# one. We can change that, but it will break a whole bunch of stuff | |
# for users I suppose. | |
compile_is = _binary_compiler('%s is %s') | |
compile_isnot = _binary_compiler('%s is not %s') | |
compile_and = _binary_compiler('%s and %s') | |
compile_or = _binary_compiler('%s or %s') | |
compile_mod = _binary_compiler('%s mod %s') | |
def compile_not(self, relation): | |
return self.compile_relation(negated=True, *relation[1]) | |
def compile_relation(self, method, expr, range_list, negated=False): | |
ranges = [] | |
for item in range_list[1]: | |
if item[0] == item[1]: | |
ranges.append(self.compile(item[0])) | |
else: | |
ranges.append('%s..%s' % tuple(map(self.compile, item))) | |
return '%s%s %s %s' % ( | |
self.compile(expr), negated and ' not' or '', | |
method, ','.join(ranges) | |
) | |