Spaces:

xsigus24
/

text-generation-webui

Running

App Files Files Community

text-generation-webui / installer_files /env /lib /python3.11 /site-packages /babel /plural.py

xsigus24

Upload folder using huggingface_hub

1d777c4 over 1 year ago

raw

history blame contribute delete

21.3 kB

	# -- coding: utf-8 --
	"""
	babel.numbers
	~~~~~~~~~~~~~

	CLDR Plural support. See UTS #35.

	:copyright: (c) 2013-2021 by the Babel Team.
	:license: BSD, see LICENSE for more details.
	"""
	import re

	from babel._compat import decimal


	_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
	_fallback_tag = 'other'


	def extract_operands(source):
	"""Extract operands from a decimal, a float or an int, according to `CLDR rules`_.

	The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows:

	====== ===============================================================
	Symbol Value
	------ ---------------------------------------------------------------
	n absolute value of the source number (integer and decimals).
	i integer digits of n.
	v number of visible fraction digits in n, with trailing zeros.
	w number of visible fraction digits in n, without trailing zeros.
	f visible fractional digits in n, with trailing zeros.
	t visible fractional digits in n, without trailing zeros.
	====== ===============================================================

	.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands

	:param source: A real number
	:type source: int\|float\|decimal.Decimal
	:return: A n-i-v-w-f-t tuple
	:rtype: tuple[decimal.Decimal, int, int, int, int, int]
	"""
	n = abs(source)
	i = int(n)
	if isinstance(n, float):
	if i == n:
	n = i
	else:
	# Cast the `float` to a number via the string representation.
	# This is required for Python 2.6 anyway (it will straight out fail to
	# do the conversion otherwise), and it's highly unlikely that the user
	# actually wants the lossless conversion behavior (quoting the Python
	# documentation):
	# > If value is a float, the binary floating point value is losslessly
	# > converted to its exact decimal equivalent.
	# > This conversion can often require 53 or more digits of precision.
	# Should the user want that behavior, they can simply pass in a pre-
	# converted `Decimal` instance of desired accuracy.
	n = decimal.Decimal(str(n))

	if isinstance(n, decimal.Decimal):
	dec_tuple = n.as_tuple()
	exp = dec_tuple.exponent
	fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
	trailing = ''.join(str(d) for d in fraction_digits)
	no_trailing = trailing.rstrip('0')
	v = len(trailing)
	w = len(no_trailing)
	f = int(trailing or 0)
	t = int(no_trailing or 0)
	else:
	v = w = f = t = 0
	return n, i, v, w, f, t


	class PluralRule(object):
	"""Represents a set of language pluralization rules. The constructor
	accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
	resulting object is callable and accepts one parameter with a positive or
	negative number (both integer and float) for the number that indicates the
	plural form for a string and returns the tag for the format:

	>>> rule = PluralRule({'one': 'n is 1'})
	>>> rule(1)
	'one'
	>>> rule(2)
	'other'

	Currently the CLDR defines these tags: zero, one, two, few, many and
	other where other is an implicit default. Rules should be mutually
	exclusive; for a given numeric value, only one rule should apply (i.e.
	the condition should only be true for one of the plural rule elements.

	.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
	"""

	__slots__ = ('abstract', '_func')

	def __init__(self, rules):
	"""Initialize the rule instance.

	:param rules: a list of ``(tag, expr)``) tuples with the rules
	conforming to UTS #35 or a dict with the tags as keys
	and expressions as values.
	:raise RuleError: if the expression is malformed
	"""
	if isinstance(rules, dict):
	rules = rules.items()
	found = set()
	self.abstract = []
	for key, expr in sorted(list(rules)):
	if key not in _plural_tags:
	raise ValueError('unknown tag %r' % key)
	elif key in found:
	raise ValueError('tag %r defined twice' % key)
	found.add(key)
	ast = _Parser(expr).ast
	if ast:
	self.abstract.append((key, ast))

	def __repr__(self):
	rules = self.rules
	return '<%s %r>' % (
	type(self).__name__,
	', '.join(['%s: %s' % (tag, rules[tag]) for tag in _plural_tags
	if tag in rules])
	)

	@classmethod
	def parse(cls, rules):
	"""Create a `PluralRule` instance for the given rules. If the rules
	are a `PluralRule` object, that object is returned.

	:param rules: the rules as list or dict, or a `PluralRule` object
	:raise RuleError: if the expression is malformed
	"""
	if isinstance(rules, cls):
	return rules
	return cls(rules)

	@property
	def rules(self):
	"""The `PluralRule` as a dict of unicode plural rules.

	>>> rule = PluralRule({'one': 'n is 1'})
	>>> rule.rules
	{'one': 'n is 1'}
	"""
	_compile = _UnicodeCompiler().compile
	return dict([(tag, _compile(ast)) for tag, ast in self.abstract])

	tags = property(lambda x: frozenset([i[0] for i in x.abstract]), doc="""
	A set of explicitly defined tags in this rule. The implicit default
	``'other'`` rules is not part of this set unless there is an explicit
	rule for it.""")

	def __getstate__(self):
	return self.abstract

	def __setstate__(self, abstract):
	self.abstract = abstract

	def __call__(self, n):
	if not hasattr(self, '_func'):
	self._func = to_python(self)
	return self._func(n)


	def to_javascript(rule):
	"""Convert a list/dict of rules or a `PluralRule` object into a JavaScript
	function. This function depends on no external library:

	>>> to_javascript({'one': 'n is 1'})
	"(function(n) { return (n == 1) ? 'one' : 'other'; })"

	Implementation detail: The function generated will probably evaluate
	expressions involved into range operations multiple times. This has the
	advantage that external helper functions are not required and is not a
	big performance hit for these simple calculations.

	:param rule: the rules as list or dict, or a `PluralRule` object
	:raise RuleError: if the expression is malformed
	"""
	to_js = _JavaScriptCompiler().compile
	result = ['(function(n) { return ']
	for tag, ast in PluralRule.parse(rule).abstract:
	result.append('%s ? %r : ' % (to_js(ast), tag))
	result.append('%r; })' % _fallback_tag)
	return ''.join(result)


	def to_python(rule):
	"""Convert a list/dict of rules or a `PluralRule` object into a regular
	Python function. This is useful in situations where you need a real
	function and don't are about the actual rule object:

	>>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
	>>> func(1)
	'one'
	>>> func(3)
	'few'
	>>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
	>>> func(11)
	'one'
	>>> func(15)
	'few'

	:param rule: the rules as list or dict, or a `PluralRule` object
	:raise RuleError: if the expression is malformed
	"""
	namespace = {
	'IN': in_range_list,
	'WITHIN': within_range_list,
	'MOD': cldr_modulo,
	'extract_operands': extract_operands,
	}
	to_python_func = _PythonCompiler().compile
	result = [
	'def evaluate(n):',
	' n, i, v, w, f, t = extract_operands(n)',
	]
	for tag, ast in PluralRule.parse(rule).abstract:
	# the str() call is to coerce the tag to the native string. It's
	# a limited ascii restricted set of tags anyways so that is fine.
	result.append(' if (%s): return %r' % (to_python_func(ast), str(tag)))
	result.append(' return %r' % _fallback_tag)
	code = compile('\n'.join(result), '<rule>', 'exec')
	eval(code, namespace)
	return namespace['evaluate']


	def to_gettext(rule):
	"""The plural rule as gettext expression. The gettext expression is
	technically limited to integers and returns indices rather than tags.

	>>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
	'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2)'

	:param rule: the rules as list or dict, or a `PluralRule` object
	:raise RuleError: if the expression is malformed
	"""
	rule = PluralRule.parse(rule)

	used_tags = rule.tags \| {_fallback_tag}
	_compile = _GettextCompiler().compile
	_get_index = [tag for tag in _plural_tags if tag in used_tags].index

	result = ['nplurals=%d; plural=(' % len(used_tags)]
	for tag, ast in rule.abstract:
	result.append('%s ? %d : ' % (_compile(ast), _get_index(tag)))
	result.append('%d)' % _get_index(_fallback_tag))
	return ''.join(result)


	def in_range_list(num, range_list):
	"""Integer range list test. This is the callback for the "in" operator
	of the UTS #35 pluralization rule language:

	>>> in_range_list(1, [(1, 3)])
	True
	>>> in_range_list(3, [(1, 3)])
	True
	>>> in_range_list(3, [(1, 3), (5, 8)])
	True
	>>> in_range_list(1.2, [(1, 4)])
	False
	>>> in_range_list(10, [(1, 4)])
	False
	>>> in_range_list(10, [(1, 4), (6, 8)])
	False
	"""
	return num == int(num) and within_range_list(num, range_list)


	def within_range_list(num, range_list):
	"""Float range test. This is the callback for the "within" operator
	of the UTS #35 pluralization rule language:

	>>> within_range_list(1, [(1, 3)])
	True
	>>> within_range_list(1.0, [(1, 3)])
	True
	>>> within_range_list(1.2, [(1, 4)])
	True
	>>> within_range_list(8.8, [(1, 4), (7, 15)])
	True
	>>> within_range_list(10, [(1, 4)])
	False
	>>> within_range_list(10.5, [(1, 4), (20, 30)])
	False
	"""
	return any(num >= min_ and num <= max_ for min_, max_ in range_list)


	def cldr_modulo(a, b):
	"""Javaish modulo. This modulo operator returns the value with the sign
	of the dividend rather than the divisor like Python does:

	>>> cldr_modulo(-3, 5)
	-3
	>>> cldr_modulo(-3, -5)
	-3
	>>> cldr_modulo(3, 5)
	3
	"""
	reverse = 0
	if a < 0:
	a *= -1
	reverse = 1
	if b < 0:
	b *= -1
	rv = a % b
	if reverse:
	rv *= -1
	return rv


	class RuleError(Exception):
	"""Raised if a rule is malformed."""

	_VARS = 'nivwft'

	_RULES = [
	(None, re.compile(r'\s+', re.UNICODE)),
	('word', re.compile(r'\b(and\|or\|is\|(?:with)?in\|not\|mod\|[{0}])\b'
	.format(_VARS))),
	('value', re.compile(r'\d+')),
	('symbol', re.compile(r'%\|,\|!=\|=')),
	('ellipsis', re.compile(r'\.{2,3}\|\u2026', re.UNICODE)) # U+2026: ELLIPSIS
	]


	def tokenize_rule(s):
	s = s.split('@')[0]
	result = []
	pos = 0
	end = len(s)
	while pos < end:
	for tok, rule in _RULES:
	match = rule.match(s, pos)
	if match is not None:
	pos = match.end()
	if tok:
	result.append((tok, match.group()))
	break
	else:
	raise RuleError('malformed CLDR pluralization rule. '
	'Got unexpected %r' % s[pos])
	return result[::-1]


	def test_next_token(tokens, type_, value=None):
	return tokens and tokens[-1][0] == type_ and \
	(value is None or tokens[-1][1] == value)


	def skip_token(tokens, type_, value=None):
	if test_next_token(tokens, type_, value):
	return tokens.pop()


	def value_node(value):
	return 'value', (value, )


	def ident_node(name):
	return name, ()


	def range_list_node(range_list):
	return 'range_list', range_list


	def negate(rv):
	return 'not', (rv,)


	class _Parser(object):
	"""Internal parser. This class can translate a single rule into an abstract
	tree of tuples. It implements the following grammar::

	condition = and_condition ('or' and_condition)*
	('@integer' samples)?
	('@decimal' samples)?
	and_condition = relation ('and' relation)*
	relation = is_relation \| in_relation \| within_relation
	is_relation = expr 'is' ('not')? value
	in_relation = expr (('not')? 'in' \| '=' \| '!=') range_list
	within_relation = expr ('not')? 'within' range_list
	expr = operand (('mod' \| '%') value)?
	operand = 'n' \| 'i' \| 'f' \| 't' \| 'v' \| 'w'
	range_list = (range \| value) (',' range_list)*
	value = digit+
	digit = 0\|1\|2\|3\|4\|5\|6\|7\|8\|9
	range = value'..'value
	samples = sampleRange (',' sampleRange)* (',' ('…'\|'...'))?
	sampleRange = decimalValue '~' decimalValue
	decimalValue = value ('.' value)?

	- Whitespace can occur between or around any of the above tokens.
	- Rules should be mutually exclusive; for a given numeric value, only one
	rule should apply (i.e. the condition should only be true for one of
	the plural rule elements).
	- The in and within relations can take comma-separated lists, such as:
	'n in 3,5,7..15'.
	- Samples are ignored.

	The translator parses the expression on instanciation into an attribute
	called `ast`.
	"""

	def __init__(self, string):
	self.tokens = tokenize_rule(string)
	if not self.tokens:
	# If the pattern is only samples, it's entirely possible
	# no stream of tokens whatsoever is generated.
	self.ast = None
	return
	self.ast = self.condition()
	if self.tokens:
	raise RuleError('Expected end of rule, got %r' %
	self.tokens[-1][1])

	def expect(self, type_, value=None, term=None):
	token = skip_token(self.tokens, type_, value)
	if token is not None:
	return token
	if term is None:
	term = repr(value is None and type_ or value)
	if not self.tokens:
	raise RuleError('expected %s but end of rule reached' % term)
	raise RuleError('expected %s but got %r' % (term, self.tokens[-1][1]))

	def condition(self):
	op = self.and_condition()
	while skip_token(self.tokens, 'word', 'or'):
	op = 'or', (op, self.and_condition())
	return op

	def and_condition(self):
	op = self.relation()
	while skip_token(self.tokens, 'word', 'and'):
	op = 'and', (op, self.relation())
	return op

	def relation(self):
	left = self.expr()
	if skip_token(self.tokens, 'word', 'is'):
	return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
	(left, self.value())
	negated = skip_token(self.tokens, 'word', 'not')
	method = 'in'
	if skip_token(self.tokens, 'word', 'within'):
	method = 'within'
	else:
	if not skip_token(self.tokens, 'word', 'in'):
	if negated:
	raise RuleError('Cannot negate operator based rules.')
	return self.newfangled_relation(left)
	rv = 'relation', (method, left, self.range_list())
	return negate(rv) if negated else rv

	def newfangled_relation(self, left):
	if skip_token(self.tokens, 'symbol', '='):
	negated = False
	elif skip_token(self.tokens, 'symbol', '!='):
	negated = True
	else:
	raise RuleError('Expected "=" or "!=" or legacy relation')
	rv = 'relation', ('in', left, self.range_list())
	return negate(rv) if negated else rv

	def range_or_value(self):
	left = self.value()
	if skip_token(self.tokens, 'ellipsis'):
	return left, self.value()
	else:
	return left, left

	def range_list(self):
	range_list = [self.range_or_value()]
	while skip_token(self.tokens, 'symbol', ','):
	range_list.append(self.range_or_value())
	return range_list_node(range_list)

	def expr(self):
	word = skip_token(self.tokens, 'word')
	if word is None or word[1] not in _VARS:
	raise RuleError('Expected identifier variable')
	name = word[1]
	if skip_token(self.tokens, 'word', 'mod'):
	return 'mod', ((name, ()), self.value())
	elif skip_token(self.tokens, 'symbol', '%'):
	return 'mod', ((name, ()), self.value())
	return ident_node(name)

	def value(self):
	return value_node(int(self.expect('value')[1]))


	def _binary_compiler(tmpl):
	"""Compiler factory for the `_Compiler`."""
	return lambda self, l, r: tmpl % (self.compile(l), self.compile(r))


	def _unary_compiler(tmpl):
	"""Compiler factory for the `_Compiler`."""
	return lambda self, x: tmpl % self.compile(x)


	compile_zero = lambda x: '0'


	class _Compiler(object):
	"""The compilers are able to transform the expressions into multiple
	output formats.
	"""

	def compile(self, arg):
	op, args = arg
	return getattr(self, 'compile_' + op)(*args)

	compile_n = lambda x: 'n'
	compile_i = lambda x: 'i'
	compile_v = lambda x: 'v'
	compile_w = lambda x: 'w'
	compile_f = lambda x: 'f'
	compile_t = lambda x: 't'
	compile_value = lambda x, v: str(v)
	compile_and = _binary_compiler('(%s && %s)')
	compile_or = _binary_compiler('(%s \|\| %s)')
	compile_not = _unary_compiler('(!%s)')
	compile_mod = _binary_compiler('(%s %% %s)')
	compile_is = _binary_compiler('(%s == %s)')
	compile_isnot = _binary_compiler('(%s != %s)')

	def compile_relation(self, method, expr, range_list):
	raise NotImplementedError()


	class _PythonCompiler(_Compiler):
	"""Compiles an expression to Python."""

	compile_and = _binary_compiler('(%s and %s)')
	compile_or = _binary_compiler('(%s or %s)')
	compile_not = _unary_compiler('(not %s)')
	compile_mod = _binary_compiler('MOD(%s, %s)')

	def compile_relation(self, method, expr, range_list):
	compile_range_list = '[%s]' % ','.join(
	['(%s, %s)' % tuple(map(self.compile, range_))
	for range_ in range_list[1]])
	return '%s(%s, %s)' % (method.upper(), self.compile(expr),
	compile_range_list)


	class _GettextCompiler(_Compiler):
	"""Compile into a gettext plural expression."""

	compile_i = _Compiler.compile_n
	compile_v = compile_zero
	compile_w = compile_zero
	compile_f = compile_zero
	compile_t = compile_zero

	def compile_relation(self, method, expr, range_list):
	rv = []
	expr = self.compile(expr)
	for item in range_list[1]:
	if item[0] == item[1]:
	rv.append('(%s == %s)' % (
	expr,
	self.compile(item[0])
	))
	else:
	min, max = map(self.compile, item)
	rv.append('(%s >= %s && %s <= %s)' % (
	expr,
	min,
	expr,
	max
	))
	return '(%s)' % ' \|\| '.join(rv)


	class _JavaScriptCompiler(_GettextCompiler):
	"""Compiles the expression to plain of JavaScript."""

	# XXX: presently javascript does not support any of the
	# fraction support and basically only deals with integers.
	compile_i = lambda x: 'parseInt(n, 10)'
	compile_v = compile_zero
	compile_w = compile_zero
	compile_f = compile_zero
	compile_t = compile_zero

	def compile_relation(self, method, expr, range_list):
	code = _GettextCompiler.compile_relation(
	self, method, expr, range_list)
	if method == 'in':
	expr = self.compile(expr)
	code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code)
	return code


	class _UnicodeCompiler(_Compiler):
	"""Returns a unicode pluralization rule again."""

	# XXX: this currently spits out the old syntax instead of the new
	# one. We can change that, but it will break a whole bunch of stuff
	# for users I suppose.

	compile_is = _binary_compiler('%s is %s')
	compile_isnot = _binary_compiler('%s is not %s')
	compile_and = _binary_compiler('%s and %s')
	compile_or = _binary_compiler('%s or %s')
	compile_mod = _binary_compiler('%s mod %s')

	def compile_not(self, relation):
	return self.compile_relation(negated=True, *relation[1])

	def compile_relation(self, method, expr, range_list, negated=False):
	ranges = []
	for item in range_list[1]:
	if item[0] == item[1]:
	ranges.append(self.compile(item[0]))
	else:
	ranges.append('%s..%s' % tuple(map(self.compile, item)))
	return '%s%s %s %s' % (
	self.compile(expr), negated and ' not' or '',
	method, ','.join(ranges)
	)