|
""" |
|
babel.messages.extract |
|
~~~~~~~~~~~~~~~~~~~~~~ |
|
|
|
Basic infrastructure for extracting localizable messages from source files. |
|
|
|
This module defines an extensible system for collecting localizable message |
|
strings from a variety of sources. A native extractor for Python source |
|
files is builtin, extractors for other sources can be added using very |
|
simple plugins. |
|
|
|
The main entry points into the extraction functionality are the functions |
|
`extract_from_dir` and `extract_from_file`. |
|
|
|
:copyright: (c) 2013-2024 by the Babel Team. |
|
:license: BSD, see LICENSE for more details. |
|
""" |
|
from __future__ import annotations |
|
|
|
import ast |
|
import io |
|
import os |
|
import sys |
|
import tokenize |
|
from collections.abc import ( |
|
Callable, |
|
Collection, |
|
Generator, |
|
Iterable, |
|
Mapping, |
|
MutableSequence, |
|
) |
|
from functools import lru_cache |
|
from os.path import relpath |
|
from textwrap import dedent |
|
from tokenize import COMMENT, NAME, OP, STRING, generate_tokens |
|
from typing import TYPE_CHECKING, Any |
|
|
|
from babel.messages._compat import find_entrypoints |
|
from babel.util import parse_encoding, parse_future_flags, pathmatch |
|
|
|
if TYPE_CHECKING: |
|
from typing import IO, Final, Protocol |
|
|
|
from _typeshed import SupportsItems, SupportsRead, SupportsReadline |
|
from typing_extensions import TypeAlias, TypedDict |
|
|
|
class _PyOptions(TypedDict, total=False): |
|
encoding: str |
|
|
|
class _JSOptions(TypedDict, total=False): |
|
encoding: str |
|
jsx: bool |
|
template_string: bool |
|
parse_template_string: bool |
|
|
|
class _FileObj(SupportsRead[bytes], SupportsReadline[bytes], Protocol): |
|
def seek(self, __offset: int, __whence: int = ...) -> int: ... |
|
def tell(self) -> int: ... |
|
|
|
_SimpleKeyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None |
|
_Keyword: TypeAlias = dict[int | None, _SimpleKeyword] | _SimpleKeyword |
|
|
|
|
|
_FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None] |
|
|
|
|
|
_ExtractionResult: TypeAlias = tuple[int, str | tuple[str, ...], list[str], str | None] |
|
|
|
|
|
|
|
_CallableExtractionMethod: TypeAlias = Callable[ |
|
[_FileObj | IO[bytes], Mapping[str, _Keyword], Collection[str], Mapping[str, Any]], |
|
Iterable[_ExtractionResult], |
|
] |
|
|
|
_ExtractionMethod: TypeAlias = _CallableExtractionMethod | str |
|
|
|
GROUP_NAME: Final[str] = 'babel.extractors' |
|
|
|
DEFAULT_KEYWORDS: dict[str, _Keyword] = { |
|
'_': None, |
|
'gettext': None, |
|
'ngettext': (1, 2), |
|
'ugettext': None, |
|
'ungettext': (1, 2), |
|
'dgettext': (2,), |
|
'dngettext': (2, 3), |
|
'N_': None, |
|
'pgettext': ((1, 'c'), 2), |
|
'npgettext': ((1, 'c'), 2, 3), |
|
} |
|
|
|
DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')] |
|
|
|
|
|
FSTRING_START = getattr(tokenize, "FSTRING_START", None) |
|
FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None) |
|
FSTRING_END = getattr(tokenize, "FSTRING_END", None) |
|
|
|
|
|
def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]): |
|
"""Helper function for `extract` that strips comment tags from strings |
|
in a list of comment lines. This functions operates in-place. |
|
""" |
|
def _strip(line: str): |
|
for tag in tags: |
|
if line.startswith(tag): |
|
return line[len(tag):].strip() |
|
return line |
|
comments[:] = map(_strip, comments) |
|
|
|
|
|
def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: |
|
subdir = os.path.basename(dirpath) |
|
|
|
return not (subdir.startswith('.') or subdir.startswith('_')) |
|
|
|
|
|
def extract_from_dir( |
|
dirname: str | os.PathLike[str] | None = None, |
|
method_map: Iterable[tuple[str, str]] = DEFAULT_MAPPING, |
|
options_map: SupportsItems[str, dict[str, Any]] | None = None, |
|
keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, |
|
comment_tags: Collection[str] = (), |
|
callback: Callable[[str, str, dict[str, Any]], object] | None = None, |
|
strip_comment_tags: bool = False, |
|
directory_filter: Callable[[str], bool] | None = None, |
|
) -> Generator[_FileExtractionResult, None, None]: |
|
"""Extract messages from any source files found in the given directory. |
|
|
|
This function generates tuples of the form ``(filename, lineno, message, |
|
comments, context)``. |
|
|
|
Which extraction method is used per file is determined by the `method_map` |
|
parameter, which maps extended glob patterns to extraction method names. |
|
For example, the following is the default mapping: |
|
|
|
>>> method_map = [ |
|
... ('**.py', 'python') |
|
... ] |
|
|
|
This basically says that files with the filename extension ".py" at any |
|
level inside the directory should be processed by the "python" extraction |
|
method. Files that don't match any of the mapping patterns are ignored. See |
|
the documentation of the `pathmatch` function for details on the pattern |
|
syntax. |
|
|
|
The following extended mapping would also use the "genshi" extraction |
|
method on any file in "templates" subdirectory: |
|
|
|
>>> method_map = [ |
|
... ('**/templates/**.*', 'genshi'), |
|
... ('**.py', 'python') |
|
... ] |
|
|
|
The dictionary provided by the optional `options_map` parameter augments |
|
these mappings. It uses extended glob patterns as keys, and the values are |
|
dictionaries mapping options names to option values (both strings). |
|
|
|
The glob patterns of the `options_map` do not necessarily need to be the |
|
same as those used in the method mapping. For example, while all files in |
|
the ``templates`` folders in an application may be Genshi applications, the |
|
options for those files may differ based on extension: |
|
|
|
>>> options_map = { |
|
... '**/templates/**.txt': { |
|
... 'template_class': 'genshi.template:TextTemplate', |
|
... 'encoding': 'latin-1' |
|
... }, |
|
... '**/templates/**.html': { |
|
... 'include_attrs': '' |
|
... } |
|
... } |
|
|
|
:param dirname: the path to the directory to extract messages from. If |
|
not given the current working directory is used. |
|
:param method_map: a list of ``(pattern, method)`` tuples that maps of |
|
extraction method names to extended glob patterns |
|
:param options_map: a dictionary of additional options (optional) |
|
:param keywords: a dictionary mapping keywords (i.e. names of functions |
|
that should be recognized as translation functions) to |
|
tuples that specify which of their arguments contain |
|
localizable strings |
|
:param comment_tags: a list of tags of translator comments to search for |
|
and include in the results |
|
:param callback: a function that is called for every file that message are |
|
extracted from, just before the extraction itself is |
|
performed; the function is passed the filename, the name |
|
of the extraction method and and the options dictionary as |
|
positional arguments, in that order |
|
:param strip_comment_tags: a flag that if set to `True` causes all comment |
|
tags to be removed from the collected comments. |
|
:param directory_filter: a callback to determine whether a directory should |
|
be recursed into. Receives the full directory path; |
|
should return True if the directory is valid. |
|
:see: `pathmatch` |
|
""" |
|
if dirname is None: |
|
dirname = os.getcwd() |
|
if options_map is None: |
|
options_map = {} |
|
if directory_filter is None: |
|
directory_filter = default_directory_filter |
|
|
|
absname = os.path.abspath(dirname) |
|
for root, dirnames, filenames in os.walk(absname): |
|
dirnames[:] = [ |
|
subdir for subdir in dirnames |
|
if directory_filter(os.path.join(root, subdir)) |
|
] |
|
dirnames.sort() |
|
filenames.sort() |
|
for filename in filenames: |
|
filepath = os.path.join(root, filename).replace(os.sep, '/') |
|
|
|
yield from check_and_call_extract_file( |
|
filepath, |
|
method_map, |
|
options_map, |
|
callback, |
|
keywords, |
|
comment_tags, |
|
strip_comment_tags, |
|
dirpath=absname, |
|
) |
|
|
|
|
|
def check_and_call_extract_file( |
|
filepath: str | os.PathLike[str], |
|
method_map: Iterable[tuple[str, str]], |
|
options_map: SupportsItems[str, dict[str, Any]], |
|
callback: Callable[[str, str, dict[str, Any]], object] | None, |
|
keywords: Mapping[str, _Keyword], |
|
comment_tags: Collection[str], |
|
strip_comment_tags: bool, |
|
dirpath: str | os.PathLike[str] | None = None, |
|
) -> Generator[_FileExtractionResult, None, None]: |
|
"""Checks if the given file matches an extraction method mapping, and if so, calls extract_from_file. |
|
|
|
Note that the extraction method mappings are based relative to dirpath. |
|
So, given an absolute path to a file `filepath`, we want to check using |
|
just the relative path from `dirpath` to `filepath`. |
|
|
|
Yields 5-tuples (filename, lineno, messages, comments, context). |
|
|
|
:param filepath: An absolute path to a file that exists. |
|
:param method_map: a list of ``(pattern, method)`` tuples that maps of |
|
extraction method names to extended glob patterns |
|
:param options_map: a dictionary of additional options (optional) |
|
:param callback: a function that is called for every file that message are |
|
extracted from, just before the extraction itself is |
|
performed; the function is passed the filename, the name |
|
of the extraction method and and the options dictionary as |
|
positional arguments, in that order |
|
:param keywords: a dictionary mapping keywords (i.e. names of functions |
|
that should be recognized as translation functions) to |
|
tuples that specify which of their arguments contain |
|
localizable strings |
|
:param comment_tags: a list of tags of translator comments to search for |
|
and include in the results |
|
:param strip_comment_tags: a flag that if set to `True` causes all comment |
|
tags to be removed from the collected comments. |
|
:param dirpath: the path to the directory to extract messages from. |
|
:return: iterable of 5-tuples (filename, lineno, messages, comments, context) |
|
:rtype: Iterable[tuple[str, int, str|tuple[str], list[str], str|None] |
|
""" |
|
|
|
filename = relpath(filepath, dirpath) |
|
|
|
for pattern, method in method_map: |
|
if not pathmatch(pattern, filename): |
|
continue |
|
|
|
options = {} |
|
for opattern, odict in options_map.items(): |
|
if pathmatch(opattern, filename): |
|
options = odict |
|
if callback: |
|
callback(filename, method, options) |
|
for message_tuple in extract_from_file( |
|
method, filepath, |
|
keywords=keywords, |
|
comment_tags=comment_tags, |
|
options=options, |
|
strip_comment_tags=strip_comment_tags, |
|
): |
|
yield (filename, *message_tuple) |
|
|
|
break |
|
|
|
|
|
def extract_from_file( |
|
method: _ExtractionMethod, |
|
filename: str | os.PathLike[str], |
|
keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, |
|
comment_tags: Collection[str] = (), |
|
options: Mapping[str, Any] | None = None, |
|
strip_comment_tags: bool = False, |
|
) -> list[_ExtractionResult]: |
|
"""Extract messages from a specific file. |
|
|
|
This function returns a list of tuples of the form ``(lineno, message, comments, context)``. |
|
|
|
:param filename: the path to the file to extract messages from |
|
:param method: a string specifying the extraction method (.e.g. "python") |
|
:param keywords: a dictionary mapping keywords (i.e. names of functions |
|
that should be recognized as translation functions) to |
|
tuples that specify which of their arguments contain |
|
localizable strings |
|
:param comment_tags: a list of translator tags to search for and include |
|
in the results |
|
:param strip_comment_tags: a flag that if set to `True` causes all comment |
|
tags to be removed from the collected comments. |
|
:param options: a dictionary of additional options (optional) |
|
:returns: list of tuples of the form ``(lineno, message, comments, context)`` |
|
:rtype: list[tuple[int, str|tuple[str], list[str], str|None] |
|
""" |
|
if method == 'ignore': |
|
return [] |
|
|
|
with open(filename, 'rb') as fileobj: |
|
return list(extract(method, fileobj, keywords, comment_tags, |
|
options, strip_comment_tags)) |
|
|
|
|
|
def _match_messages_against_spec(lineno: int, messages: list[str|None], comments: list[str], |
|
fileobj: _FileObj, spec: tuple[int|tuple[int, str], ...]): |
|
translatable = [] |
|
context = None |
|
|
|
|
|
last_index = len(messages) |
|
for index in spec: |
|
if isinstance(index, tuple): |
|
context = messages[index[0] - 1] |
|
continue |
|
if last_index < index: |
|
|
|
return |
|
message = messages[index - 1] |
|
if message is None: |
|
return |
|
translatable.append(message) |
|
|
|
|
|
if isinstance(spec[0], tuple): |
|
|
|
first_msg_index = spec[1] - 1 |
|
else: |
|
first_msg_index = spec[0] - 1 |
|
|
|
if not messages[first_msg_index]: |
|
filename = (getattr(fileobj, "name", None) or "(unknown)") |
|
sys.stderr.write( |
|
f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") " |
|
f"returns the header entry with meta information, not the empty string.\n", |
|
) |
|
return |
|
|
|
translatable = tuple(translatable) |
|
if len(translatable) == 1: |
|
translatable = translatable[0] |
|
|
|
return lineno, translatable, comments, context |
|
|
|
|
|
@lru_cache(maxsize=None) |
|
def _find_extractor(name: str): |
|
for ep_name, load in find_entrypoints(GROUP_NAME): |
|
if ep_name == name: |
|
return load() |
|
return None |
|
|
|
|
|
def extract( |
|
method: _ExtractionMethod, |
|
fileobj: _FileObj, |
|
keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS, |
|
comment_tags: Collection[str] = (), |
|
options: Mapping[str, Any] | None = None, |
|
strip_comment_tags: bool = False, |
|
) -> Generator[_ExtractionResult, None, None]: |
|
"""Extract messages from the given file-like object using the specified |
|
extraction method. |
|
|
|
This function returns tuples of the form ``(lineno, message, comments, context)``. |
|
|
|
The implementation dispatches the actual extraction to plugins, based on the |
|
value of the ``method`` parameter. |
|
|
|
>>> source = b'''# foo module |
|
... def run(argv): |
|
... print(_('Hello, world!')) |
|
... ''' |
|
|
|
>>> from io import BytesIO |
|
>>> for message in extract('python', BytesIO(source)): |
|
... print(message) |
|
(3, u'Hello, world!', [], None) |
|
|
|
:param method: an extraction method (a callable), or |
|
a string specifying the extraction method (.e.g. "python"); |
|
if this is a simple name, the extraction function will be |
|
looked up by entry point; if it is an explicit reference |
|
to a function (of the form ``package.module:funcname`` or |
|
``package.module.funcname``), the corresponding function |
|
will be imported and used |
|
:param fileobj: the file-like object the messages should be extracted from |
|
:param keywords: a dictionary mapping keywords (i.e. names of functions |
|
that should be recognized as translation functions) to |
|
tuples that specify which of their arguments contain |
|
localizable strings |
|
:param comment_tags: a list of translator tags to search for and include |
|
in the results |
|
:param options: a dictionary of additional options (optional) |
|
:param strip_comment_tags: a flag that if set to `True` causes all comment |
|
tags to be removed from the collected comments. |
|
:raise ValueError: if the extraction method is not registered |
|
:returns: iterable of tuples of the form ``(lineno, message, comments, context)`` |
|
:rtype: Iterable[tuple[int, str|tuple[str], list[str], str|None] |
|
""" |
|
func = None |
|
if callable(method): |
|
func = method |
|
elif ':' in method or '.' in method: |
|
if ':' not in method: |
|
lastdot = method.rfind('.') |
|
module, attrname = method[:lastdot], method[lastdot + 1:] |
|
else: |
|
module, attrname = method.split(':', 1) |
|
func = getattr(__import__(module, {}, {}, [attrname]), attrname) |
|
else: |
|
func = _find_extractor(method) |
|
if func is None: |
|
|
|
|
|
func = _BUILTIN_EXTRACTORS.get(method) |
|
|
|
if func is None: |
|
raise ValueError(f"Unknown extraction method {method!r}") |
|
|
|
results = func(fileobj, keywords.keys(), comment_tags, |
|
options=options or {}) |
|
|
|
for lineno, funcname, messages, comments in results: |
|
if not isinstance(messages, (list, tuple)): |
|
messages = [messages] |
|
if not messages: |
|
continue |
|
|
|
specs = keywords[funcname] or None if funcname else None |
|
|
|
if not isinstance(specs, dict): |
|
specs = {None: specs} |
|
|
|
if strip_comment_tags: |
|
_strip_comment_tags(comments, comment_tags) |
|
|
|
|
|
for arity in (None, len(messages)): |
|
try: |
|
spec = specs[arity] |
|
except KeyError: |
|
continue |
|
if spec is None: |
|
spec = (1,) |
|
result = _match_messages_against_spec(lineno, messages, comments, fileobj, spec) |
|
if result is not None: |
|
yield result |
|
|
|
|
|
def extract_nothing( |
|
fileobj: _FileObj, |
|
keywords: Mapping[str, _Keyword], |
|
comment_tags: Collection[str], |
|
options: Mapping[str, Any], |
|
) -> list[_ExtractionResult]: |
|
"""Pseudo extractor that does not actually extract anything, but simply |
|
returns an empty list. |
|
""" |
|
return [] |
|
|
|
|
|
def extract_python( |
|
fileobj: IO[bytes], |
|
keywords: Mapping[str, _Keyword], |
|
comment_tags: Collection[str], |
|
options: _PyOptions, |
|
) -> Generator[_ExtractionResult, None, None]: |
|
"""Extract messages from Python source code. |
|
|
|
It returns an iterator yielding tuples in the following form ``(lineno, |
|
funcname, message, comments)``. |
|
|
|
:param fileobj: the seekable, file-like object the messages should be |
|
extracted from |
|
:param keywords: a list of keywords (i.e. function names) that should be |
|
recognized as translation functions |
|
:param comment_tags: a list of translator tags to search for and include |
|
in the results |
|
:param options: a dictionary of additional options (optional) |
|
:rtype: ``iterator`` |
|
""" |
|
funcname = lineno = message_lineno = None |
|
call_stack = -1 |
|
buf = [] |
|
messages = [] |
|
translator_comments = [] |
|
in_def = in_translator_comments = False |
|
comment_tag = None |
|
|
|
encoding = parse_encoding(fileobj) or options.get('encoding', 'UTF-8') |
|
future_flags = parse_future_flags(fileobj, encoding) |
|
next_line = lambda: fileobj.readline().decode(encoding) |
|
|
|
tokens = generate_tokens(next_line) |
|
|
|
|
|
|
|
current_fstring_start = None |
|
|
|
for tok, value, (lineno, _), _, _ in tokens: |
|
if call_stack == -1 and tok == NAME and value in ('def', 'class'): |
|
in_def = True |
|
elif tok == OP and value == '(': |
|
if in_def: |
|
|
|
|
|
in_def = False |
|
continue |
|
if funcname: |
|
message_lineno = lineno |
|
call_stack += 1 |
|
elif in_def and tok == OP and value == ':': |
|
|
|
in_def = False |
|
continue |
|
elif call_stack == -1 and tok == COMMENT: |
|
|
|
value = value[1:].strip() |
|
if in_translator_comments and \ |
|
translator_comments[-1][0] == lineno - 1: |
|
|
|
translator_comments.append((lineno, value)) |
|
continue |
|
|
|
|
|
for comment_tag in comment_tags: |
|
if value.startswith(comment_tag): |
|
in_translator_comments = True |
|
translator_comments.append((lineno, value)) |
|
break |
|
elif funcname and call_stack == 0: |
|
nested = (tok == NAME and value in keywords) |
|
if (tok == OP and value == ')') or nested: |
|
if buf: |
|
messages.append(''.join(buf)) |
|
del buf[:] |
|
else: |
|
messages.append(None) |
|
|
|
messages = tuple(messages) if len(messages) > 1 else messages[0] |
|
|
|
|
|
if translator_comments and \ |
|
translator_comments[-1][0] < message_lineno - 1: |
|
translator_comments = [] |
|
|
|
yield (message_lineno, funcname, messages, |
|
[comment[1] for comment in translator_comments]) |
|
|
|
funcname = lineno = message_lineno = None |
|
call_stack = -1 |
|
messages = [] |
|
translator_comments = [] |
|
in_translator_comments = False |
|
if nested: |
|
funcname = value |
|
elif tok == STRING: |
|
val = _parse_python_string(value, encoding, future_flags) |
|
if val is not None: |
|
buf.append(val) |
|
|
|
|
|
elif tok == FSTRING_START: |
|
current_fstring_start = value |
|
elif tok == FSTRING_MIDDLE: |
|
if current_fstring_start is not None: |
|
current_fstring_start += value |
|
elif tok == FSTRING_END: |
|
if current_fstring_start is not None: |
|
fstring = current_fstring_start + value |
|
val = _parse_python_string(fstring, encoding, future_flags) |
|
if val is not None: |
|
buf.append(val) |
|
|
|
elif tok == OP and value == ',': |
|
if buf: |
|
messages.append(''.join(buf)) |
|
del buf[:] |
|
else: |
|
messages.append(None) |
|
if translator_comments: |
|
|
|
|
|
|
|
|
|
old_lineno, old_comment = translator_comments.pop() |
|
translator_comments.append((old_lineno + 1, old_comment)) |
|
elif call_stack > 0 and tok == OP and value == ')': |
|
call_stack -= 1 |
|
elif funcname and call_stack == -1: |
|
funcname = None |
|
elif tok == NAME and value in keywords: |
|
funcname = value |
|
|
|
if (current_fstring_start is not None |
|
and tok not in {FSTRING_START, FSTRING_MIDDLE} |
|
): |
|
|
|
|
|
|
|
|
|
current_fstring_start = None |
|
|
|
|
|
def _parse_python_string(value: str, encoding: str, future_flags: int) -> str | None: |
|
|
|
|
|
code = compile( |
|
f'# coding={str(encoding)}\n{value}', |
|
'<string>', |
|
'eval', |
|
ast.PyCF_ONLY_AST | future_flags, |
|
) |
|
if isinstance(code, ast.Expression): |
|
body = code.body |
|
if isinstance(body, ast.Constant): |
|
return body.value |
|
if isinstance(body, ast.JoinedStr): |
|
if all(isinstance(node, ast.Constant) for node in body.values): |
|
return ''.join(node.value for node in body.values) |
|
|
|
return None |
|
|
|
|
|
def extract_javascript( |
|
fileobj: _FileObj, |
|
keywords: Mapping[str, _Keyword], |
|
comment_tags: Collection[str], |
|
options: _JSOptions, |
|
lineno: int = 1, |
|
) -> Generator[_ExtractionResult, None, None]: |
|
"""Extract messages from JavaScript source code. |
|
|
|
:param fileobj: the seekable, file-like object the messages should be |
|
extracted from |
|
:param keywords: a list of keywords (i.e. function names) that should be |
|
recognized as translation functions |
|
:param comment_tags: a list of translator tags to search for and include |
|
in the results |
|
:param options: a dictionary of additional options (optional) |
|
Supported options are: |
|
* `jsx` -- set to false to disable JSX/E4X support. |
|
* `template_string` -- if `True`, supports gettext(`key`) |
|
* `parse_template_string` -- if `True` will parse the |
|
contents of javascript |
|
template strings. |
|
:param lineno: line number offset (for parsing embedded fragments) |
|
""" |
|
from babel.messages.jslexer import Token, tokenize, unquote_string |
|
funcname = message_lineno = None |
|
messages = [] |
|
last_argument = None |
|
translator_comments = [] |
|
concatenate_next = False |
|
encoding = options.get('encoding', 'utf-8') |
|
last_token = None |
|
call_stack = -1 |
|
dotted = any('.' in kw for kw in keywords) |
|
for token in tokenize( |
|
fileobj.read().decode(encoding), |
|
jsx=options.get("jsx", True), |
|
template_string=options.get("template_string", True), |
|
dotted=dotted, |
|
lineno=lineno, |
|
): |
|
if ( |
|
funcname and |
|
(last_token and last_token.type == 'name') and |
|
token.type == 'template_string' |
|
): |
|
message_lineno = token.lineno |
|
messages = [unquote_string(token.value)] |
|
call_stack = 0 |
|
token = Token('operator', ')', token.lineno) |
|
|
|
if options.get('parse_template_string') and not funcname and token.type == 'template_string': |
|
yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno) |
|
|
|
elif token.type == 'operator' and token.value == '(': |
|
if funcname: |
|
message_lineno = token.lineno |
|
call_stack += 1 |
|
|
|
elif call_stack == -1 and token.type == 'linecomment': |
|
value = token.value[2:].strip() |
|
if translator_comments and \ |
|
translator_comments[-1][0] == token.lineno - 1: |
|
translator_comments.append((token.lineno, value)) |
|
continue |
|
|
|
for comment_tag in comment_tags: |
|
if value.startswith(comment_tag): |
|
translator_comments.append((token.lineno, value.strip())) |
|
break |
|
|
|
elif token.type == 'multilinecomment': |
|
|
|
translator_comments = [] |
|
value = token.value[2:-2].strip() |
|
for comment_tag in comment_tags: |
|
if value.startswith(comment_tag): |
|
lines = value.splitlines() |
|
if lines: |
|
lines[0] = lines[0].strip() |
|
lines[1:] = dedent('\n'.join(lines[1:])).splitlines() |
|
for offset, line in enumerate(lines): |
|
translator_comments.append((token.lineno + offset, |
|
line)) |
|
break |
|
|
|
elif funcname and call_stack == 0: |
|
if token.type == 'operator' and token.value == ')': |
|
if last_argument is not None: |
|
messages.append(last_argument) |
|
if len(messages) > 1: |
|
messages = tuple(messages) |
|
elif messages: |
|
messages = messages[0] |
|
else: |
|
messages = None |
|
|
|
|
|
|
|
if translator_comments and \ |
|
translator_comments[-1][0] < message_lineno - 1: |
|
translator_comments = [] |
|
|
|
if messages is not None: |
|
yield (message_lineno, funcname, messages, |
|
[comment[1] for comment in translator_comments]) |
|
|
|
funcname = message_lineno = last_argument = None |
|
concatenate_next = False |
|
translator_comments = [] |
|
messages = [] |
|
call_stack = -1 |
|
|
|
elif token.type in ('string', 'template_string'): |
|
new_value = unquote_string(token.value) |
|
if concatenate_next: |
|
last_argument = (last_argument or '') + new_value |
|
concatenate_next = False |
|
else: |
|
last_argument = new_value |
|
|
|
elif token.type == 'operator': |
|
if token.value == ',': |
|
if last_argument is not None: |
|
messages.append(last_argument) |
|
last_argument = None |
|
else: |
|
messages.append(None) |
|
concatenate_next = False |
|
elif token.value == '+': |
|
concatenate_next = True |
|
|
|
elif call_stack > 0 and token.type == 'operator' \ |
|
and token.value == ')': |
|
call_stack -= 1 |
|
|
|
elif funcname and call_stack == -1: |
|
funcname = None |
|
|
|
elif call_stack == -1 and token.type == 'name' and \ |
|
token.value in keywords and \ |
|
(last_token is None or last_token.type != 'name' or |
|
last_token.value != 'function'): |
|
funcname = token.value |
|
|
|
last_token = token |
|
|
|
|
|
def parse_template_string( |
|
template_string: str, |
|
keywords: Mapping[str, _Keyword], |
|
comment_tags: Collection[str], |
|
options: _JSOptions, |
|
lineno: int = 1, |
|
) -> Generator[_ExtractionResult, None, None]: |
|
"""Parse JavaScript template string. |
|
|
|
:param template_string: the template string to be parsed |
|
:param keywords: a list of keywords (i.e. function names) that should be |
|
recognized as translation functions |
|
:param comment_tags: a list of translator tags to search for and include |
|
in the results |
|
:param options: a dictionary of additional options (optional) |
|
:param lineno: starting line number (optional) |
|
""" |
|
from babel.messages.jslexer import line_re |
|
prev_character = None |
|
level = 0 |
|
inside_str = False |
|
expression_contents = '' |
|
for character in template_string[1:-1]: |
|
if not inside_str and character in ('"', "'", '`'): |
|
inside_str = character |
|
elif inside_str == character and prev_character != r'\\': |
|
inside_str = False |
|
if level: |
|
expression_contents += character |
|
if not inside_str: |
|
if character == '{' and prev_character == '$': |
|
level += 1 |
|
elif level and character == '}': |
|
level -= 1 |
|
if level == 0 and expression_contents: |
|
expression_contents = expression_contents[0:-1] |
|
fake_file_obj = io.BytesIO(expression_contents.encode()) |
|
yield from extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno) |
|
lineno += len(line_re.findall(expression_contents)) |
|
expression_contents = '' |
|
prev_character = character |
|
|
|
|
|
_BUILTIN_EXTRACTORS = { |
|
'ignore': extract_nothing, |
|
'python': extract_python, |
|
'javascript': extract_javascript, |
|
} |
|
|