|
""" |
|
babel.numbers |
|
~~~~~~~~~~~~~ |
|
|
|
CLDR Plural support. See UTS #35. |
|
|
|
:copyright: (c) 2013-2024 by the Babel Team. |
|
:license: BSD, see LICENSE for more details. |
|
""" |
|
from __future__ import annotations |
|
|
|
import decimal |
|
import re |
|
from collections.abc import Iterable, Mapping |
|
from typing import TYPE_CHECKING, Any, Callable |
|
|
|
if TYPE_CHECKING: |
|
from typing_extensions import Literal |
|
|
|
_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other') |
|
_fallback_tag = 'other' |
|
|
|
|
|
def extract_operands(source: float | decimal.Decimal) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]: |
|
"""Extract operands from a decimal, a float or an int, according to `CLDR rules`_. |
|
|
|
The result is an 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows: |
|
|
|
====== =============================================================== |
|
Symbol Value |
|
------ --------------------------------------------------------------- |
|
n absolute value of the source number (integer and decimals). |
|
i integer digits of n. |
|
v number of visible fraction digits in n, with trailing zeros. |
|
w number of visible fraction digits in n, without trailing zeros. |
|
f visible fractional digits in n, with trailing zeros. |
|
t visible fractional digits in n, without trailing zeros. |
|
c compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting. |
|
e currently, synonym for ‘c’. however, may be redefined in the future. |
|
====== =============================================================== |
|
|
|
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands |
|
|
|
:param source: A real number |
|
:type source: int|float|decimal.Decimal |
|
:return: A n-i-v-w-f-t-c-e tuple |
|
:rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int] |
|
""" |
|
n = abs(source) |
|
i = int(n) |
|
if isinstance(n, float): |
|
if i == n: |
|
n = i |
|
else: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
n = decimal.Decimal(str(n)) |
|
|
|
if isinstance(n, decimal.Decimal): |
|
dec_tuple = n.as_tuple() |
|
exp = dec_tuple.exponent |
|
fraction_digits = dec_tuple.digits[exp:] if exp < 0 else () |
|
trailing = ''.join(str(d) for d in fraction_digits) |
|
no_trailing = trailing.rstrip('0') |
|
v = len(trailing) |
|
w = len(no_trailing) |
|
f = int(trailing or 0) |
|
t = int(no_trailing or 0) |
|
else: |
|
v = w = f = t = 0 |
|
c = e = 0 |
|
return n, i, v, w, f, t, c, e |
|
|
|
|
|
class PluralRule: |
|
"""Represents a set of language pluralization rules. The constructor |
|
accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The |
|
resulting object is callable and accepts one parameter with a positive or |
|
negative number (both integer and float) for the number that indicates the |
|
plural form for a string and returns the tag for the format: |
|
|
|
>>> rule = PluralRule({'one': 'n is 1'}) |
|
>>> rule(1) |
|
'one' |
|
>>> rule(2) |
|
'other' |
|
|
|
Currently the CLDR defines these tags: zero, one, two, few, many and |
|
other where other is an implicit default. Rules should be mutually |
|
exclusive; for a given numeric value, only one rule should apply (i.e. |
|
the condition should only be true for one of the plural rule elements. |
|
|
|
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules |
|
""" |
|
|
|
__slots__ = ('abstract', '_func') |
|
|
|
def __init__(self, rules: Mapping[str, str] | Iterable[tuple[str, str]]) -> None: |
|
"""Initialize the rule instance. |
|
|
|
:param rules: a list of ``(tag, expr)``) tuples with the rules |
|
conforming to UTS #35 or a dict with the tags as keys |
|
and expressions as values. |
|
:raise RuleError: if the expression is malformed |
|
""" |
|
if isinstance(rules, Mapping): |
|
rules = rules.items() |
|
found = set() |
|
self.abstract: list[tuple[str, Any]] = [] |
|
for key, expr in sorted(rules): |
|
if key not in _plural_tags: |
|
raise ValueError(f"unknown tag {key!r}") |
|
elif key in found: |
|
raise ValueError(f"tag {key!r} defined twice") |
|
found.add(key) |
|
ast = _Parser(expr).ast |
|
if ast: |
|
self.abstract.append((key, ast)) |
|
|
|
def __repr__(self) -> str: |
|
rules = self.rules |
|
args = ", ".join([f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules]) |
|
return f"<{type(self).__name__} {args!r}>" |
|
|
|
@classmethod |
|
def parse(cls, rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> PluralRule: |
|
"""Create a `PluralRule` instance for the given rules. If the rules |
|
are a `PluralRule` object, that object is returned. |
|
|
|
:param rules: the rules as list or dict, or a `PluralRule` object |
|
:raise RuleError: if the expression is malformed |
|
""" |
|
if isinstance(rules, PluralRule): |
|
return rules |
|
return cls(rules) |
|
|
|
@property |
|
def rules(self) -> Mapping[str, str]: |
|
"""The `PluralRule` as a dict of unicode plural rules. |
|
|
|
>>> rule = PluralRule({'one': 'n is 1'}) |
|
>>> rule.rules |
|
{'one': 'n is 1'} |
|
""" |
|
_compile = _UnicodeCompiler().compile |
|
return {tag: _compile(ast) for tag, ast in self.abstract} |
|
|
|
@property |
|
def tags(self) -> frozenset[str]: |
|
"""A set of explicitly defined tags in this rule. The implicit default |
|
``'other'`` rules is not part of this set unless there is an explicit |
|
rule for it. |
|
""" |
|
return frozenset(i[0] for i in self.abstract) |
|
|
|
def __getstate__(self) -> list[tuple[str, Any]]: |
|
return self.abstract |
|
|
|
def __setstate__(self, abstract: list[tuple[str, Any]]) -> None: |
|
self.abstract = abstract |
|
|
|
def __call__(self, n: float | decimal.Decimal) -> str: |
|
if not hasattr(self, '_func'): |
|
self._func = to_python(self) |
|
return self._func(n) |
|
|
|
|
|
def to_javascript(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str: |
|
"""Convert a list/dict of rules or a `PluralRule` object into a JavaScript |
|
function. This function depends on no external library: |
|
|
|
>>> to_javascript({'one': 'n is 1'}) |
|
"(function(n) { return (n == 1) ? 'one' : 'other'; })" |
|
|
|
Implementation detail: The function generated will probably evaluate |
|
expressions involved into range operations multiple times. This has the |
|
advantage that external helper functions are not required and is not a |
|
big performance hit for these simple calculations. |
|
|
|
:param rule: the rules as list or dict, or a `PluralRule` object |
|
:raise RuleError: if the expression is malformed |
|
""" |
|
to_js = _JavaScriptCompiler().compile |
|
result = ['(function(n) { return '] |
|
for tag, ast in PluralRule.parse(rule).abstract: |
|
result.append(f"{to_js(ast)} ? {tag!r} : ") |
|
result.append('%r; })' % _fallback_tag) |
|
return ''.join(result) |
|
|
|
|
|
def to_python(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> Callable[[float | decimal.Decimal], str]: |
|
"""Convert a list/dict of rules or a `PluralRule` object into a regular |
|
Python function. This is useful in situations where you need a real |
|
function and don't are about the actual rule object: |
|
|
|
>>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'}) |
|
>>> func(1) |
|
'one' |
|
>>> func(3) |
|
'few' |
|
>>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'}) |
|
>>> func(11) |
|
'one' |
|
>>> func(15) |
|
'few' |
|
|
|
:param rule: the rules as list or dict, or a `PluralRule` object |
|
:raise RuleError: if the expression is malformed |
|
""" |
|
namespace = { |
|
'IN': in_range_list, |
|
'WITHIN': within_range_list, |
|
'MOD': cldr_modulo, |
|
'extract_operands': extract_operands, |
|
} |
|
to_python_func = _PythonCompiler().compile |
|
result = [ |
|
'def evaluate(n):', |
|
' n, i, v, w, f, t, c, e = extract_operands(n)', |
|
] |
|
for tag, ast in PluralRule.parse(rule).abstract: |
|
|
|
|
|
result.append(f" if ({to_python_func(ast)}): return {str(tag)!r}") |
|
result.append(f" return {_fallback_tag!r}") |
|
code = compile('\n'.join(result), '<rule>', 'exec') |
|
eval(code, namespace) |
|
return namespace['evaluate'] |
|
|
|
|
|
def to_gettext(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str: |
|
"""The plural rule as gettext expression. The gettext expression is |
|
technically limited to integers and returns indices rather than tags. |
|
|
|
>>> to_gettext({'one': 'n is 1', 'two': 'n is 2'}) |
|
'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2);' |
|
|
|
:param rule: the rules as list or dict, or a `PluralRule` object |
|
:raise RuleError: if the expression is malformed |
|
""" |
|
rule = PluralRule.parse(rule) |
|
|
|
used_tags = rule.tags | {_fallback_tag} |
|
_compile = _GettextCompiler().compile |
|
_get_index = [tag for tag in _plural_tags if tag in used_tags].index |
|
|
|
result = [f"nplurals={len(used_tags)}; plural=("] |
|
for tag, ast in rule.abstract: |
|
result.append(f"{_compile(ast)} ? {_get_index(tag)} : ") |
|
result.append(f"{_get_index(_fallback_tag)});") |
|
return ''.join(result) |
|
|
|
|
|
def in_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool: |
|
"""Integer range list test. This is the callback for the "in" operator |
|
of the UTS #35 pluralization rule language: |
|
|
|
>>> in_range_list(1, [(1, 3)]) |
|
True |
|
>>> in_range_list(3, [(1, 3)]) |
|
True |
|
>>> in_range_list(3, [(1, 3), (5, 8)]) |
|
True |
|
>>> in_range_list(1.2, [(1, 4)]) |
|
False |
|
>>> in_range_list(10, [(1, 4)]) |
|
False |
|
>>> in_range_list(10, [(1, 4), (6, 8)]) |
|
False |
|
""" |
|
return num == int(num) and within_range_list(num, range_list) |
|
|
|
|
|
def within_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool: |
|
"""Float range test. This is the callback for the "within" operator |
|
of the UTS #35 pluralization rule language: |
|
|
|
>>> within_range_list(1, [(1, 3)]) |
|
True |
|
>>> within_range_list(1.0, [(1, 3)]) |
|
True |
|
>>> within_range_list(1.2, [(1, 4)]) |
|
True |
|
>>> within_range_list(8.8, [(1, 4), (7, 15)]) |
|
True |
|
>>> within_range_list(10, [(1, 4)]) |
|
False |
|
>>> within_range_list(10.5, [(1, 4), (20, 30)]) |
|
False |
|
""" |
|
return any(num >= min_ and num <= max_ for min_, max_ in range_list) |
|
|
|
|
|
def cldr_modulo(a: float, b: float) -> float: |
|
"""Javaish modulo. This modulo operator returns the value with the sign |
|
of the dividend rather than the divisor like Python does: |
|
|
|
>>> cldr_modulo(-3, 5) |
|
-3 |
|
>>> cldr_modulo(-3, -5) |
|
-3 |
|
>>> cldr_modulo(3, 5) |
|
3 |
|
""" |
|
reverse = 0 |
|
if a < 0: |
|
a *= -1 |
|
reverse = 1 |
|
if b < 0: |
|
b *= -1 |
|
rv = a % b |
|
if reverse: |
|
rv *= -1 |
|
return rv |
|
|
|
|
|
class RuleError(Exception): |
|
"""Raised if a rule is malformed.""" |
|
|
|
|
|
_VARS = { |
|
'n', |
|
'i', |
|
'v', |
|
'w', |
|
'f', |
|
't', |
|
'c', |
|
'e', |
|
} |
|
|
|
_RULES: list[tuple[str | None, re.Pattern[str]]] = [ |
|
(None, re.compile(r'\s+', re.UNICODE)), |
|
('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')), |
|
('value', re.compile(r'\d+')), |
|
('symbol', re.compile(r'%|,|!=|=')), |
|
('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)), |
|
] |
|
|
|
|
|
def tokenize_rule(s: str) -> list[tuple[str, str]]: |
|
s = s.split('@')[0] |
|
result: list[tuple[str, str]] = [] |
|
pos = 0 |
|
end = len(s) |
|
while pos < end: |
|
for tok, rule in _RULES: |
|
match = rule.match(s, pos) |
|
if match is not None: |
|
pos = match.end() |
|
if tok: |
|
result.append((tok, match.group())) |
|
break |
|
else: |
|
raise RuleError(f"malformed CLDR pluralization rule. Got unexpected {s[pos]!r}") |
|
return result[::-1] |
|
|
|
|
|
def test_next_token( |
|
tokens: list[tuple[str, str]], |
|
type_: str, |
|
value: str | None = None, |
|
) -> list[tuple[str, str]] | bool: |
|
return tokens and tokens[-1][0] == type_ and \ |
|
(value is None or tokens[-1][1] == value) |
|
|
|
|
|
def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = None): |
|
if test_next_token(tokens, type_, value): |
|
return tokens.pop() |
|
|
|
|
|
def value_node(value: int) -> tuple[Literal['value'], tuple[int]]: |
|
return 'value', (value, ) |
|
|
|
|
|
def ident_node(name: str) -> tuple[str, tuple[()]]: |
|
return name, () |
|
|
|
|
|
def range_list_node( |
|
range_list: Iterable[Iterable[float | decimal.Decimal]], |
|
) -> tuple[Literal['range_list'], Iterable[Iterable[float | decimal.Decimal]]]: |
|
return 'range_list', range_list |
|
|
|
|
|
def negate(rv: tuple[Any, ...]) -> tuple[Literal['not'], tuple[tuple[Any, ...]]]: |
|
return 'not', (rv,) |
|
|
|
|
|
class _Parser: |
|
"""Internal parser. This class can translate a single rule into an abstract |
|
tree of tuples. It implements the following grammar:: |
|
|
|
condition = and_condition ('or' and_condition)* |
|
('@integer' samples)? |
|
('@decimal' samples)? |
|
and_condition = relation ('and' relation)* |
|
relation = is_relation | in_relation | within_relation |
|
is_relation = expr 'is' ('not')? value |
|
in_relation = expr (('not')? 'in' | '=' | '!=') range_list |
|
within_relation = expr ('not')? 'within' range_list |
|
expr = operand (('mod' | '%') value)? |
|
operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w' |
|
range_list = (range | value) (',' range_list)* |
|
value = digit+ |
|
digit = 0|1|2|3|4|5|6|7|8|9 |
|
range = value'..'value |
|
samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))? |
|
sampleRange = decimalValue '~' decimalValue |
|
decimalValue = value ('.' value)? |
|
|
|
- Whitespace can occur between or around any of the above tokens. |
|
- Rules should be mutually exclusive; for a given numeric value, only one |
|
rule should apply (i.e. the condition should only be true for one of |
|
the plural rule elements). |
|
- The in and within relations can take comma-separated lists, such as: |
|
'n in 3,5,7..15'. |
|
- Samples are ignored. |
|
|
|
The translator parses the expression on instantiation into an attribute |
|
called `ast`. |
|
""" |
|
|
|
def __init__(self, string): |
|
self.tokens = tokenize_rule(string) |
|
if not self.tokens: |
|
|
|
|
|
self.ast = None |
|
return |
|
self.ast = self.condition() |
|
if self.tokens: |
|
raise RuleError(f"Expected end of rule, got {self.tokens[-1][1]!r}") |
|
|
|
def expect(self, type_, value=None, term=None): |
|
token = skip_token(self.tokens, type_, value) |
|
if token is not None: |
|
return token |
|
if term is None: |
|
term = repr(value is None and type_ or value) |
|
if not self.tokens: |
|
raise RuleError(f"expected {term} but end of rule reached") |
|
raise RuleError(f"expected {term} but got {self.tokens[-1][1]!r}") |
|
|
|
def condition(self): |
|
op = self.and_condition() |
|
while skip_token(self.tokens, 'word', 'or'): |
|
op = 'or', (op, self.and_condition()) |
|
return op |
|
|
|
def and_condition(self): |
|
op = self.relation() |
|
while skip_token(self.tokens, 'word', 'and'): |
|
op = 'and', (op, self.relation()) |
|
return op |
|
|
|
def relation(self): |
|
left = self.expr() |
|
if skip_token(self.tokens, 'word', 'is'): |
|
return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \ |
|
(left, self.value()) |
|
negated = skip_token(self.tokens, 'word', 'not') |
|
method = 'in' |
|
if skip_token(self.tokens, 'word', 'within'): |
|
method = 'within' |
|
else: |
|
if not skip_token(self.tokens, 'word', 'in'): |
|
if negated: |
|
raise RuleError('Cannot negate operator based rules.') |
|
return self.newfangled_relation(left) |
|
rv = 'relation', (method, left, self.range_list()) |
|
return negate(rv) if negated else rv |
|
|
|
def newfangled_relation(self, left): |
|
if skip_token(self.tokens, 'symbol', '='): |
|
negated = False |
|
elif skip_token(self.tokens, 'symbol', '!='): |
|
negated = True |
|
else: |
|
raise RuleError('Expected "=" or "!=" or legacy relation') |
|
rv = 'relation', ('in', left, self.range_list()) |
|
return negate(rv) if negated else rv |
|
|
|
def range_or_value(self): |
|
left = self.value() |
|
if skip_token(self.tokens, 'ellipsis'): |
|
return left, self.value() |
|
else: |
|
return left, left |
|
|
|
def range_list(self): |
|
range_list = [self.range_or_value()] |
|
while skip_token(self.tokens, 'symbol', ','): |
|
range_list.append(self.range_or_value()) |
|
return range_list_node(range_list) |
|
|
|
def expr(self): |
|
word = skip_token(self.tokens, 'word') |
|
if word is None or word[1] not in _VARS: |
|
raise RuleError('Expected identifier variable') |
|
name = word[1] |
|
if skip_token(self.tokens, 'word', 'mod'): |
|
return 'mod', ((name, ()), self.value()) |
|
elif skip_token(self.tokens, 'symbol', '%'): |
|
return 'mod', ((name, ()), self.value()) |
|
return ident_node(name) |
|
|
|
def value(self): |
|
return value_node(int(self.expect('value')[1])) |
|
|
|
|
|
def _binary_compiler(tmpl): |
|
"""Compiler factory for the `_Compiler`.""" |
|
return lambda self, left, right: tmpl % (self.compile(left), self.compile(right)) |
|
|
|
|
|
def _unary_compiler(tmpl): |
|
"""Compiler factory for the `_Compiler`.""" |
|
return lambda self, x: tmpl % self.compile(x) |
|
|
|
|
|
compile_zero = lambda x: '0' |
|
|
|
|
|
class _Compiler: |
|
"""The compilers are able to transform the expressions into multiple |
|
output formats. |
|
""" |
|
|
|
def compile(self, arg): |
|
op, args = arg |
|
return getattr(self, f"compile_{op}")(*args) |
|
|
|
compile_n = lambda x: 'n' |
|
compile_i = lambda x: 'i' |
|
compile_v = lambda x: 'v' |
|
compile_w = lambda x: 'w' |
|
compile_f = lambda x: 'f' |
|
compile_t = lambda x: 't' |
|
compile_c = lambda x: 'c' |
|
compile_e = lambda x: 'e' |
|
compile_value = lambda x, v: str(v) |
|
compile_and = _binary_compiler('(%s && %s)') |
|
compile_or = _binary_compiler('(%s || %s)') |
|
compile_not = _unary_compiler('(!%s)') |
|
compile_mod = _binary_compiler('(%s %% %s)') |
|
compile_is = _binary_compiler('(%s == %s)') |
|
compile_isnot = _binary_compiler('(%s != %s)') |
|
|
|
def compile_relation(self, method, expr, range_list): |
|
raise NotImplementedError() |
|
|
|
|
|
class _PythonCompiler(_Compiler): |
|
"""Compiles an expression to Python.""" |
|
|
|
compile_and = _binary_compiler('(%s and %s)') |
|
compile_or = _binary_compiler('(%s or %s)') |
|
compile_not = _unary_compiler('(not %s)') |
|
compile_mod = _binary_compiler('MOD(%s, %s)') |
|
|
|
def compile_relation(self, method, expr, range_list): |
|
ranges = ",".join([f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1]]) |
|
return f"{method.upper()}({self.compile(expr)}, [{ranges}])" |
|
|
|
|
|
class _GettextCompiler(_Compiler): |
|
"""Compile into a gettext plural expression.""" |
|
|
|
compile_i = _Compiler.compile_n |
|
compile_v = compile_zero |
|
compile_w = compile_zero |
|
compile_f = compile_zero |
|
compile_t = compile_zero |
|
|
|
def compile_relation(self, method, expr, range_list): |
|
rv = [] |
|
expr = self.compile(expr) |
|
for item in range_list[1]: |
|
if item[0] == item[1]: |
|
rv.append(f"({expr} == {self.compile(item[0])})") |
|
else: |
|
min, max = map(self.compile, item) |
|
rv.append(f"({expr} >= {min} && {expr} <= {max})") |
|
return f"({' || '.join(rv)})" |
|
|
|
|
|
class _JavaScriptCompiler(_GettextCompiler): |
|
"""Compiles the expression to plain of JavaScript.""" |
|
|
|
|
|
|
|
compile_i = lambda x: 'parseInt(n, 10)' |
|
compile_v = compile_zero |
|
compile_w = compile_zero |
|
compile_f = compile_zero |
|
compile_t = compile_zero |
|
|
|
def compile_relation(self, method, expr, range_list): |
|
code = _GettextCompiler.compile_relation( |
|
self, method, expr, range_list) |
|
if method == 'in': |
|
expr = self.compile(expr) |
|
code = f"(parseInt({expr}, 10) == {expr} && {code})" |
|
return code |
|
|
|
|
|
class _UnicodeCompiler(_Compiler): |
|
"""Returns a unicode pluralization rule again.""" |
|
|
|
|
|
|
|
|
|
|
|
compile_is = _binary_compiler('%s is %s') |
|
compile_isnot = _binary_compiler('%s is not %s') |
|
compile_and = _binary_compiler('%s and %s') |
|
compile_or = _binary_compiler('%s or %s') |
|
compile_mod = _binary_compiler('%s mod %s') |
|
|
|
def compile_not(self, relation): |
|
return self.compile_relation(*relation[1], negated=True) |
|
|
|
def compile_relation(self, method, expr, range_list, negated=False): |
|
ranges = [] |
|
for item in range_list[1]: |
|
if item[0] == item[1]: |
|
ranges.append(self.compile(item[0])) |
|
else: |
|
ranges.append(f"{self.compile(item[0])}..{self.compile(item[1])}") |
|
return f"{self.compile(expr)}{' not' if negated else ''} {method} {','.join(ranges)}" |
|
|