Spaces:

xsigus24
/

text-generation-webui

Running

File size: 21,314 Bytes

1d777c4

# -*- coding: utf-8 -*-
"""
    babel.numbers
    ~~~~~~~~~~~~~

    CLDR Plural support.  See UTS #35.

    :copyright: (c) 2013-2021 by the Babel Team.
    :license: BSD, see LICENSE for more details.
"""
import re

from babel._compat import decimal


_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
_fallback_tag = 'other'


def extract_operands(source):
    """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.

    The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows:

    ====== ===============================================================
    Symbol Value
    ------ ---------------------------------------------------------------
    n      absolute value of the source number (integer and decimals).
    i      integer digits of n.
    v      number of visible fraction digits in n, with trailing zeros.
    w      number of visible fraction digits in n, without trailing zeros.
    f      visible fractional digits in n, with trailing zeros.
    t      visible fractional digits in n, without trailing zeros.
    ====== ===============================================================

    .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands

    :param source: A real number
    :type source: int|float|decimal.Decimal
    :return: A n-i-v-w-f-t tuple
    :rtype: tuple[decimal.Decimal, int, int, int, int, int]
    """
    n = abs(source)
    i = int(n)
    if isinstance(n, float):
        if i == n:
            n = i
        else:
            # Cast the `float` to a number via the string representation.
            # This is required for Python 2.6 anyway (it will straight out fail to
            # do the conversion otherwise), and it's highly unlikely that the user
            # actually wants the lossless conversion behavior (quoting the Python
            # documentation):
            # > If value is a float, the binary floating point value is losslessly
            # > converted to its exact decimal equivalent.
            # > This conversion can often require 53 or more digits of precision.
            # Should the user want that behavior, they can simply pass in a pre-
            # converted `Decimal` instance of desired accuracy.
            n = decimal.Decimal(str(n))

    if isinstance(n, decimal.Decimal):
        dec_tuple = n.as_tuple()
        exp = dec_tuple.exponent
        fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
        trailing = ''.join(str(d) for d in fraction_digits)
        no_trailing = trailing.rstrip('0')
        v = len(trailing)
        w = len(no_trailing)
        f = int(trailing or 0)
        t = int(no_trailing or 0)
    else:
        v = w = f = t = 0
    return n, i, v, w, f, t


class PluralRule(object):
    """Represents a set of language pluralization rules.  The constructor
    accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
    resulting object is callable and accepts one parameter with a positive or
    negative number (both integer and float) for the number that indicates the
    plural form for a string and returns the tag for the format:

    >>> rule = PluralRule({'one': 'n is 1'})
    >>> rule(1)
    'one'
    >>> rule(2)
    'other'

    Currently the CLDR defines these tags: zero, one, two, few, many and
    other where other is an implicit default.  Rules should be mutually
    exclusive; for a given numeric value, only one rule should apply (i.e.
    the condition should only be true for one of the plural rule elements.

    .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
    """

    __slots__ = ('abstract', '_func')

    def __init__(self, rules):
        """Initialize the rule instance.

        :param rules: a list of ``(tag, expr)``) tuples with the rules
                      conforming to UTS #35 or a dict with the tags as keys
                      and expressions as values.
        :raise RuleError: if the expression is malformed
        """
        if isinstance(rules, dict):
            rules = rules.items()
        found = set()
        self.abstract = []
        for key, expr in sorted(list(rules)):
            if key not in _plural_tags:
                raise ValueError('unknown tag %r' % key)
            elif key in found:
                raise ValueError('tag %r defined twice' % key)
            found.add(key)
            ast = _Parser(expr).ast
            if ast:
                self.abstract.append((key, ast))

    def __repr__(self):
        rules = self.rules
        return '<%s %r>' % (
            type(self).__name__,
            ', '.join(['%s: %s' % (tag, rules[tag]) for tag in _plural_tags
                       if tag in rules])
        )

    @classmethod
    def parse(cls, rules):
        """Create a `PluralRule` instance for the given rules.  If the rules
        are a `PluralRule` object, that object is returned.

        :param rules: the rules as list or dict, or a `PluralRule` object
        :raise RuleError: if the expression is malformed
        """
        if isinstance(rules, cls):
            return rules
        return cls(rules)

    @property
    def rules(self):
        """The `PluralRule` as a dict of unicode plural rules.

        >>> rule = PluralRule({'one': 'n is 1'})
        >>> rule.rules
        {'one': 'n is 1'}
        """
        _compile = _UnicodeCompiler().compile
        return dict([(tag, _compile(ast)) for tag, ast in self.abstract])

    tags = property(lambda x: frozenset([i[0] for i in x.abstract]), doc="""
        A set of explicitly defined tags in this rule.  The implicit default
        ``'other'`` rules is not part of this set unless there is an explicit
        rule for it.""")

    def __getstate__(self):
        return self.abstract

    def __setstate__(self, abstract):
        self.abstract = abstract

    def __call__(self, n):
        if not hasattr(self, '_func'):
            self._func = to_python(self)
        return self._func(n)


def to_javascript(rule):
    """Convert a list/dict of rules or a `PluralRule` object into a JavaScript
    function.  This function depends on no external library:

    >>> to_javascript({'one': 'n is 1'})
    "(function(n) { return (n == 1) ? 'one' : 'other'; })"

    Implementation detail: The function generated will probably evaluate
    expressions involved into range operations multiple times.  This has the
    advantage that external helper functions are not required and is not a
    big performance hit for these simple calculations.

    :param rule: the rules as list or dict, or a `PluralRule` object
    :raise RuleError: if the expression is malformed
    """
    to_js = _JavaScriptCompiler().compile
    result = ['(function(n) { return ']
    for tag, ast in PluralRule.parse(rule).abstract:
        result.append('%s ? %r : ' % (to_js(ast), tag))
    result.append('%r; })' % _fallback_tag)
    return ''.join(result)


def to_python(rule):
    """Convert a list/dict of rules or a `PluralRule` object into a regular
    Python function.  This is useful in situations where you need a real
    function and don't are about the actual rule object:

    >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
    >>> func(1)
    'one'
    >>> func(3)
    'few'
    >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
    >>> func(11)
    'one'
    >>> func(15)
    'few'

    :param rule: the rules as list or dict, or a `PluralRule` object
    :raise RuleError: if the expression is malformed
    """
    namespace = {
        'IN': in_range_list,
        'WITHIN': within_range_list,
        'MOD': cldr_modulo,
        'extract_operands': extract_operands,
    }
    to_python_func = _PythonCompiler().compile
    result = [
        'def evaluate(n):',
        ' n, i, v, w, f, t = extract_operands(n)',
    ]
    for tag, ast in PluralRule.parse(rule).abstract:
        # the str() call is to coerce the tag to the native string.  It's
        # a limited ascii restricted set of tags anyways so that is fine.
        result.append(' if (%s): return %r' % (to_python_func(ast), str(tag)))
    result.append(' return %r' % _fallback_tag)
    code = compile('\n'.join(result), '<rule>', 'exec')
    eval(code, namespace)
    return namespace['evaluate']


def to_gettext(rule):
    """The plural rule as gettext expression.  The gettext expression is
    technically limited to integers and returns indices rather than tags.

    >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
    'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2)'

    :param rule: the rules as list or dict, or a `PluralRule` object
    :raise RuleError: if the expression is malformed
    """
    rule = PluralRule.parse(rule)

    used_tags = rule.tags | {_fallback_tag}
    _compile = _GettextCompiler().compile
    _get_index = [tag for tag in _plural_tags if tag in used_tags].index

    result = ['nplurals=%d; plural=(' % len(used_tags)]
    for tag, ast in rule.abstract:
        result.append('%s ? %d : ' % (_compile(ast), _get_index(tag)))
    result.append('%d)' % _get_index(_fallback_tag))
    return ''.join(result)


def in_range_list(num, range_list):
    """Integer range list test.  This is the callback for the "in" operator
    of the UTS #35 pluralization rule language:

    >>> in_range_list(1, [(1, 3)])
    True
    >>> in_range_list(3, [(1, 3)])
    True
    >>> in_range_list(3, [(1, 3), (5, 8)])
    True
    >>> in_range_list(1.2, [(1, 4)])
    False
    >>> in_range_list(10, [(1, 4)])
    False
    >>> in_range_list(10, [(1, 4), (6, 8)])
    False
    """
    return num == int(num) and within_range_list(num, range_list)


def within_range_list(num, range_list):
    """Float range test.  This is the callback for the "within" operator
    of the UTS #35 pluralization rule language:

    >>> within_range_list(1, [(1, 3)])
    True
    >>> within_range_list(1.0, [(1, 3)])
    True
    >>> within_range_list(1.2, [(1, 4)])
    True
    >>> within_range_list(8.8, [(1, 4), (7, 15)])
    True
    >>> within_range_list(10, [(1, 4)])
    False
    >>> within_range_list(10.5, [(1, 4), (20, 30)])
    False
    """
    return any(num >= min_ and num <= max_ for min_, max_ in range_list)


def cldr_modulo(a, b):
    """Javaish modulo.  This modulo operator returns the value with the sign
    of the dividend rather than the divisor like Python does:

    >>> cldr_modulo(-3, 5)
    -3
    >>> cldr_modulo(-3, -5)
    -3
    >>> cldr_modulo(3, 5)
    3
    """
    reverse = 0
    if a < 0:
        a *= -1
        reverse = 1
    if b < 0:
        b *= -1
    rv = a % b
    if reverse:
        rv *= -1
    return rv


class RuleError(Exception):
    """Raised if a rule is malformed."""

_VARS = 'nivwft'

_RULES = [
    (None, re.compile(r'\s+', re.UNICODE)),
    ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[{0}])\b'
                        .format(_VARS))),
    ('value', re.compile(r'\d+')),
    ('symbol', re.compile(r'%|,|!=|=')),
    ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE))  # U+2026: ELLIPSIS
]


def tokenize_rule(s):
    s = s.split('@')[0]
    result = []
    pos = 0
    end = len(s)
    while pos < end:
        for tok, rule in _RULES:
            match = rule.match(s, pos)
            if match is not None:
                pos = match.end()
                if tok:
                    result.append((tok, match.group()))
                break
        else:
            raise RuleError('malformed CLDR pluralization rule.  '
                            'Got unexpected %r' % s[pos])
    return result[::-1]


def test_next_token(tokens, type_, value=None):
    return tokens and tokens[-1][0] == type_ and \
        (value is None or tokens[-1][1] == value)


def skip_token(tokens, type_, value=None):
    if test_next_token(tokens, type_, value):
        return tokens.pop()


def value_node(value):
    return 'value', (value, )


def ident_node(name):
    return name, ()


def range_list_node(range_list):
    return 'range_list', range_list


def negate(rv):
    return 'not', (rv,)


class _Parser(object):
    """Internal parser.  This class can translate a single rule into an abstract
    tree of tuples. It implements the following grammar::

        condition     = and_condition ('or' and_condition)*
                        ('@integer' samples)?
                        ('@decimal' samples)?
        and_condition = relation ('and' relation)*
        relation      = is_relation | in_relation | within_relation
        is_relation   = expr 'is' ('not')? value
        in_relation   = expr (('not')? 'in' | '=' | '!=') range_list
        within_relation = expr ('not')? 'within' range_list
        expr          = operand (('mod' | '%') value)?
        operand       = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
        range_list    = (range | value) (',' range_list)*
        value         = digit+
        digit         = 0|1|2|3|4|5|6|7|8|9
        range         = value'..'value
        samples       = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
        sampleRange   = decimalValue '~' decimalValue
        decimalValue  = value ('.' value)?

    - Whitespace can occur between or around any of the above tokens.
    - Rules should be mutually exclusive; for a given numeric value, only one
      rule should apply (i.e. the condition should only be true for one of
      the plural rule elements).
    - The in and within relations can take comma-separated lists, such as:
      'n in 3,5,7..15'.
    - Samples are ignored.

    The translator parses the expression on instanciation into an attribute
    called `ast`.
    """

    def __init__(self, string):
        self.tokens = tokenize_rule(string)
        if not self.tokens:
            # If the pattern is only samples, it's entirely possible
            # no stream of tokens whatsoever is generated.
            self.ast = None
            return
        self.ast = self.condition()
        if self.tokens:
            raise RuleError('Expected end of rule, got %r' %
                            self.tokens[-1][1])

    def expect(self, type_, value=None, term=None):
        token = skip_token(self.tokens, type_, value)
        if token is not None:
            return token
        if term is None:
            term = repr(value is None and type_ or value)
        if not self.tokens:
            raise RuleError('expected %s but end of rule reached' % term)
        raise RuleError('expected %s but got %r' % (term, self.tokens[-1][1]))

    def condition(self):
        op = self.and_condition()
        while skip_token(self.tokens, 'word', 'or'):
            op = 'or', (op, self.and_condition())
        return op

    def and_condition(self):
        op = self.relation()
        while skip_token(self.tokens, 'word', 'and'):
            op = 'and', (op, self.relation())
        return op

    def relation(self):
        left = self.expr()
        if skip_token(self.tokens, 'word', 'is'):
            return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
                (left, self.value())
        negated = skip_token(self.tokens, 'word', 'not')
        method = 'in'
        if skip_token(self.tokens, 'word', 'within'):
            method = 'within'
        else:
            if not skip_token(self.tokens, 'word', 'in'):
                if negated:
                    raise RuleError('Cannot negate operator based rules.')
                return self.newfangled_relation(left)
        rv = 'relation', (method, left, self.range_list())
        return negate(rv) if negated else rv

    def newfangled_relation(self, left):
        if skip_token(self.tokens, 'symbol', '='):
            negated = False
        elif skip_token(self.tokens, 'symbol', '!='):
            negated = True
        else:
            raise RuleError('Expected "=" or "!=" or legacy relation')
        rv = 'relation', ('in', left, self.range_list())
        return negate(rv) if negated else rv

    def range_or_value(self):
        left = self.value()
        if skip_token(self.tokens, 'ellipsis'):
            return left, self.value()
        else:
            return left, left

    def range_list(self):
        range_list = [self.range_or_value()]
        while skip_token(self.tokens, 'symbol', ','):
            range_list.append(self.range_or_value())
        return range_list_node(range_list)

    def expr(self):
        word = skip_token(self.tokens, 'word')
        if word is None or word[1] not in _VARS:
            raise RuleError('Expected identifier variable')
        name = word[1]
        if skip_token(self.tokens, 'word', 'mod'):
            return 'mod', ((name, ()), self.value())
        elif skip_token(self.tokens, 'symbol', '%'):
            return 'mod', ((name, ()), self.value())
        return ident_node(name)

    def value(self):
        return value_node(int(self.expect('value')[1]))


def _binary_compiler(tmpl):
    """Compiler factory for the `_Compiler`."""
    return lambda self, l, r: tmpl % (self.compile(l), self.compile(r))


def _unary_compiler(tmpl):
    """Compiler factory for the `_Compiler`."""
    return lambda self, x: tmpl % self.compile(x)


compile_zero = lambda x: '0'


class _Compiler(object):
    """The compilers are able to transform the expressions into multiple
    output formats.
    """

    def compile(self, arg):
        op, args = arg
        return getattr(self, 'compile_' + op)(*args)

    compile_n = lambda x: 'n'
    compile_i = lambda x: 'i'
    compile_v = lambda x: 'v'
    compile_w = lambda x: 'w'
    compile_f = lambda x: 'f'
    compile_t = lambda x: 't'
    compile_value = lambda x, v: str(v)
    compile_and = _binary_compiler('(%s && %s)')
    compile_or = _binary_compiler('(%s || %s)')
    compile_not = _unary_compiler('(!%s)')
    compile_mod = _binary_compiler('(%s %% %s)')
    compile_is = _binary_compiler('(%s == %s)')
    compile_isnot = _binary_compiler('(%s != %s)')

    def compile_relation(self, method, expr, range_list):
        raise NotImplementedError()


class _PythonCompiler(_Compiler):
    """Compiles an expression to Python."""

    compile_and = _binary_compiler('(%s and %s)')
    compile_or = _binary_compiler('(%s or %s)')
    compile_not = _unary_compiler('(not %s)')
    compile_mod = _binary_compiler('MOD(%s, %s)')

    def compile_relation(self, method, expr, range_list):
        compile_range_list = '[%s]' % ','.join(
            ['(%s, %s)' % tuple(map(self.compile, range_))
             for range_ in range_list[1]])
        return '%s(%s, %s)' % (method.upper(), self.compile(expr),
                               compile_range_list)


class _GettextCompiler(_Compiler):
    """Compile into a gettext plural expression."""

    compile_i = _Compiler.compile_n
    compile_v = compile_zero
    compile_w = compile_zero
    compile_f = compile_zero
    compile_t = compile_zero

    def compile_relation(self, method, expr, range_list):
        rv = []
        expr = self.compile(expr)
        for item in range_list[1]:
            if item[0] == item[1]:
                rv.append('(%s == %s)' % (
                    expr,
                    self.compile(item[0])
                ))
            else:
                min, max = map(self.compile, item)
                rv.append('(%s >= %s && %s <= %s)' % (
                    expr,
                    min,
                    expr,
                    max
                ))
        return '(%s)' % ' || '.join(rv)


class _JavaScriptCompiler(_GettextCompiler):
    """Compiles the expression to plain of JavaScript."""

    # XXX: presently javascript does not support any of the
    # fraction support and basically only deals with integers.
    compile_i = lambda x: 'parseInt(n, 10)'
    compile_v = compile_zero
    compile_w = compile_zero
    compile_f = compile_zero
    compile_t = compile_zero

    def compile_relation(self, method, expr, range_list):
        code = _GettextCompiler.compile_relation(
            self, method, expr, range_list)
        if method == 'in':
            expr = self.compile(expr)
            code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code)
        return code


class _UnicodeCompiler(_Compiler):
    """Returns a unicode pluralization rule again."""

    # XXX: this currently spits out the old syntax instead of the new
    # one.  We can change that, but it will break a whole bunch of stuff
    # for users I suppose.

    compile_is = _binary_compiler('%s is %s')
    compile_isnot = _binary_compiler('%s is not %s')
    compile_and = _binary_compiler('%s and %s')
    compile_or = _binary_compiler('%s or %s')
    compile_mod = _binary_compiler('%s mod %s')

    def compile_not(self, relation):
        return self.compile_relation(negated=True, *relation[1])

    def compile_relation(self, method, expr, range_list, negated=False):
        ranges = []
        for item in range_list[1]:
            if item[0] == item[1]:
                ranges.append(self.compile(item[0]))
            else:
                ranges.append('%s..%s' % tuple(map(self.compile, item)))
        return '%s%s %s %s' % (
            self.compile(expr), negated and ' not' or '',
            method, ','.join(ranges)
        )