Spaces:
Running
Running
text-generation-webui
/
installer_files
/env
/lib
/python3.11
/site-packages
/babel
/messages
/jslexer.py
# -*- coding: utf-8 -*- | |
""" | |
babel.messages.jslexer | |
~~~~~~~~~~~~~~~~~~~~~~ | |
A simple JavaScript 1.5 lexer which is used for the JavaScript | |
extractor. | |
:copyright: (c) 2013-2021 by the Babel Team. | |
:license: BSD, see LICENSE for more details. | |
""" | |
from collections import namedtuple | |
import re | |
from babel._compat import unichr | |
operators = sorted([ | |
'+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', | |
'+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', | |
'>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', | |
'[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' | |
], key=len, reverse=True) | |
escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} | |
name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE) | |
dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE) | |
division_re = re.compile(r'/=?') | |
regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL) | |
line_re = re.compile(r'(\r\n|\n|\r)') | |
line_join_re = re.compile(r'\\' + line_re.pattern) | |
uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}') | |
Token = namedtuple('Token', 'type value lineno') | |
_rules = [ | |
(None, re.compile(r'\s+', re.UNICODE)), | |
(None, re.compile(r'<!--.*')), | |
('linecomment', re.compile(r'//.*')), | |
('multilinecomment', re.compile(r'/\*.*?\*/', re.UNICODE | re.DOTALL)), | |
('dotted_name', dotted_name_re), | |
('name', name_re), | |
('number', re.compile(r'''( | |
(?:0|[1-9]\d*) | |
(\.\d+)? | |
([eE][-+]?\d+)? | | |
(0x[a-fA-F0-9]+) | |
)''', re.VERBOSE)), | |
('jsx_tag', re.compile(r'(?:</?[^>\s]+|/>)', re.I)), # May be mangled in `get_rules` | |
('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))), | |
('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)), | |
('string', re.compile(r'''( | |
'(?:[^'\\]*(?:\\.[^'\\]*)*)' | | |
"(?:[^"\\]*(?:\\.[^"\\]*)*)" | |
)''', re.VERBOSE | re.DOTALL)) | |
] | |
def get_rules(jsx, dotted, template_string): | |
""" | |
Get a tokenization rule list given the passed syntax options. | |
Internal to this module. | |
""" | |
rules = [] | |
for token_type, rule in _rules: | |
if not jsx and token_type and 'jsx' in token_type: | |
continue | |
if not template_string and token_type == 'template_string': | |
continue | |
if token_type == 'dotted_name': | |
if not dotted: | |
continue | |
token_type = 'name' | |
rules.append((token_type, rule)) | |
return rules | |
def indicates_division(token): | |
"""A helper function that helps the tokenizer to decide if the current | |
token may be followed by a division operator. | |
""" | |
if token.type == 'operator': | |
return token.value in (')', ']', '}', '++', '--') | |
return token.type in ('name', 'number', 'string', 'regexp') | |
def unquote_string(string): | |
"""Unquote a string with JavaScript rules. The string has to start with | |
string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).) | |
""" | |
assert string and string[0] == string[-1] and string[0] in '"\'`', \ | |
'string provided is not properly delimited' | |
string = line_join_re.sub('\\1', string[1:-1]) | |
result = [] | |
add = result.append | |
pos = 0 | |
while 1: | |
# scan for the next escape | |
escape_pos = string.find('\\', pos) | |
if escape_pos < 0: | |
break | |
add(string[pos:escape_pos]) | |
# check which character is escaped | |
next_char = string[escape_pos + 1] | |
if next_char in escapes: | |
add(escapes[next_char]) | |
# unicode escapes. trie to consume up to four characters of | |
# hexadecimal characters and try to interpret them as unicode | |
# character point. If there is no such character point, put | |
# all the consumed characters into the string. | |
elif next_char in 'uU': | |
escaped = uni_escape_re.match(string, escape_pos + 2) | |
if escaped is not None: | |
escaped_value = escaped.group() | |
if len(escaped_value) == 4: | |
try: | |
add(unichr(int(escaped_value, 16))) | |
except ValueError: | |
pass | |
else: | |
pos = escape_pos + 6 | |
continue | |
add(next_char + escaped_value) | |
pos = escaped.end() | |
continue | |
else: | |
add(next_char) | |
# bogus escape. Just remove the backslash. | |
else: | |
add(next_char) | |
pos = escape_pos + 2 | |
if pos < len(string): | |
add(string[pos:]) | |
return u''.join(result) | |
def tokenize(source, jsx=True, dotted=True, template_string=True): | |
""" | |
Tokenize JavaScript/JSX source. Returns a generator of tokens. | |
:param jsx: Enable (limited) JSX parsing. | |
:param dotted: Read dotted names as single name token. | |
:param template_string: Support ES6 template strings | |
""" | |
may_divide = False | |
pos = 0 | |
lineno = 1 | |
end = len(source) | |
rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string) | |
while pos < end: | |
# handle regular rules first | |
for token_type, rule in rules: | |
match = rule.match(source, pos) | |
if match is not None: | |
break | |
# if we don't have a match we don't give up yet, but check for | |
# division operators or regular expression literals, based on | |
# the status of `may_divide` which is determined by the last | |
# processed non-whitespace token using `indicates_division`. | |
else: | |
if may_divide: | |
match = division_re.match(source, pos) | |
token_type = 'operator' | |
else: | |
match = regex_re.match(source, pos) | |
token_type = 'regexp' | |
if match is None: | |
# woops. invalid syntax. jump one char ahead and try again. | |
pos += 1 | |
continue | |
token_value = match.group() | |
if token_type is not None: | |
token = Token(token_type, token_value, lineno) | |
may_divide = indicates_division(token) | |
yield token | |
lineno += len(line_re.findall(token_value)) | |
pos = match.end() | |