# Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. """ Localization utilities to find available language packs and packages with localization data. """ from __future__ import annotations import gettext import importlib import json import locale import os import re import sys import traceback from functools import lru_cache from typing import Any, Pattern import babel from packaging.version import parse as parse_version # See compatibility note on `group` keyword in https://docs.python.org/3/library/importlib.metadata.html#entry-points if sys.version_info < (3, 10): # pragma: no cover from importlib_metadata import entry_points else: # pragma: no cover from importlib.metadata import entry_points # Entry points JUPYTERLAB_LANGUAGEPACK_ENTRY = "jupyterlab.languagepack" JUPYTERLAB_LOCALE_ENTRY = "jupyterlab.locale" # Constants DEFAULT_LOCALE = "en" SYS_LOCALE = locale.getlocale()[0] or DEFAULT_LOCALE LOCALE_DIR = "locale" LC_MESSAGES_DIR = "LC_MESSAGES" DEFAULT_DOMAIN = "jupyterlab" L10N_SCHEMA_NAME = "@jupyterlab/translation-extension:plugin" PY37_OR_LOWER = sys.version_info[:2] <= (3, 7) # Pseudo language locale for in-context translation PSEUDO_LANGUAGE = "ach_UG" _default_schema_context = "schema" _default_settings_context = "settings" _lab_i18n_config = "jupyter.lab.internationalization" # mapping of schema translatable string selectors to translation context DEFAULT_SCHEMA_SELECTORS = { "properties/.*/title": _default_settings_context, "properties/.*/description": _default_settings_context, "definitions/.*/properties/.*/title": _default_settings_context, "definitions/.*/properties/.*/description": _default_settings_context, "title": _default_schema_context, "description": _default_schema_context, # JupyterLab-specific r"jupyter\.lab\.setting-icon-label": _default_settings_context, r"jupyter\.lab\.menus/.*/label": "menu", r"jupyter\.lab\.toolbars/.*/label": "toolbar", } @lru_cache def _get_default_schema_selectors() -> dict[Pattern, str]: return { re.compile("^/" + pattern + "$"): context for pattern, context in DEFAULT_SCHEMA_SELECTORS.items() } def _prepare_schema_patterns(schema: dict) -> dict[Pattern, str]: return { **_get_default_schema_selectors(), **{ re.compile("^/" + selector + "$"): _default_schema_context for selector in schema.get(_lab_i18n_config, {}).get("selectors", []) }, } # --- Private process helpers # ---------------------------------------------------------------------------- def _get_installed_language_pack_locales() -> tuple[dict[str, Any], str]: """ Get available installed language pack locales. Returns ------- tuple A tuple, where the first item is the result and the second item any error messages. """ data = {} messages = [] for entry_point in entry_points(group=JUPYTERLAB_LANGUAGEPACK_ENTRY): try: data[entry_point.name] = os.path.dirname(entry_point.load().__file__) except Exception: # pragma: no cover messages.append(traceback.format_exc()) message = "\n".join(messages) return data, message def _get_installed_package_locales() -> tuple[dict[str, Any], str]: """ Get available installed packages containing locale information. Returns ------- tuple A tuple, where the first item is the result and the second item any error messages. The value for the key points to the root location the package. """ data = {} messages = [] for entry_point in entry_points(group=JUPYTERLAB_LOCALE_ENTRY): try: data[entry_point.name] = os.path.dirname(entry_point.load().__file__) except Exception: messages.append(traceback.format_exc()) message = "\n".join(messages) return data, message # --- Helpers # ---------------------------------------------------------------------------- def is_valid_locale(locale_: str) -> bool: """ Check if a `locale_` value is valid. Parameters ---------- locale_: str Language locale code. Notes ----- A valid locale is in the form language (See ISO-639 standard) and an optional territory (See ISO-3166 standard). Examples of valid locales: - English: DEFAULT_LOCALE - Australian English: "en_AU" - Portuguese: "pt" - Brazilian Portuguese: "pt_BR" Examples of invalid locales: - Australian Spanish: "es_AU" - Brazilian German: "de_BR" """ # Add exception for Norwegian if locale_ in { "no_NO", }: return True valid = False try: babel.Locale.parse(locale_) valid = True except (babel.core.UnknownLocaleError, ValueError): # Expected error if the locale is unknown pass return valid def get_display_name(locale_: str, display_locale: str = DEFAULT_LOCALE) -> str: """ Return the language name to use with a `display_locale` for a given language locale. Parameters ---------- locale_: str The language name to use. display_locale: str, optional The language to display the `locale_`. Returns ------- str Localized `locale_` and capitalized language name using `display_locale` as language. """ locale_ = locale_ if is_valid_locale(locale_) else DEFAULT_LOCALE display_locale = display_locale if is_valid_locale(display_locale) else DEFAULT_LOCALE try: loc = babel.Locale.parse(locale_) display_name = loc.get_display_name(display_locale) except babel.UnknownLocaleError: display_name = display_locale if display_name: display_name = display_name[0].upper() + display_name[1:] return display_name # type:ignore[return-value] def merge_locale_data( language_pack_locale_data: dict[str, Any], package_locale_data: dict[str, Any] ) -> dict[str, Any]: """ Merge language pack data with locale data bundled in packages. Parameters ---------- language_pack_locale_data: dict The dictionary with language pack locale data. package_locale_data: dict The dictionary with package locale data. Returns ------- dict Merged locale data. """ result = language_pack_locale_data package_lp_metadata = language_pack_locale_data.get("", {}) package_lp_version = package_lp_metadata.get("version", None) package_lp_domain = package_lp_metadata.get("domain", None) package_metadata = package_locale_data.get("", {}) package_version = package_metadata.get("version", None) package_domain = package_metadata.get("domain", "None") if package_lp_version and package_version and package_domain == package_lp_domain: package_version = parse_version(package_version) package_lp_version = parse_version(package_lp_version) if package_version > package_lp_version: # If package version is more recent, then update keys of the language pack result = language_pack_locale_data.copy() result.update(package_locale_data) return result def get_installed_packages_locale(locale_: str) -> tuple[dict, str]: """ Get all jupyterlab extensions installed that contain locale data. Returns ------- tuple A tuple in the form `(locale_data_dict, message)`, where the `locale_data_dict` is an ordered list of available language packs: >>> {"package-name": locale_data, ...} Examples -------- - `entry_points={"jupyterlab.locale": "package-name = package_module"}` - `entry_points={"jupyterlab.locale": "jupyterlab-git = jupyterlab_git"}` """ found_package_locales, message = _get_installed_package_locales() packages_locale_data = {} messages = message.split("\n") if not message: for package_name, package_root_path in found_package_locales.items(): locales = {} try: locale_path = os.path.join(package_root_path, LOCALE_DIR) # Handle letter casing locales = { loc.lower(): loc for loc in os.listdir(locale_path) if os.path.isdir(os.path.join(locale_path, loc)) } except Exception: messages.append(traceback.format_exc()) if locale_.lower() in locales: locale_json_path = os.path.join( locale_path, locales[locale_.lower()], LC_MESSAGES_DIR, f"{package_name}.json", ) if os.path.isfile(locale_json_path): try: with open(locale_json_path, encoding="utf-8") as fh: packages_locale_data[package_name] = json.load(fh) except Exception: messages.append(traceback.format_exc()) return packages_locale_data, "\n".join(messages) # --- API # ---------------------------------------------------------------------------- def get_language_packs(display_locale: str = DEFAULT_LOCALE) -> tuple[dict, str]: """ Return the available language packs installed in the system. The returned information contains the languages displayed in the current locale. Parameters ---------- display_locale: str, optional Default is DEFAULT_LOCALE. Returns ------- tuple A tuple in the form `(locale_data_dict, message)`. """ found_locales, message = _get_installed_language_pack_locales() locales = {} messages = message.split("\n") if not message: invalid_locales = [] valid_locales = [] messages = [] for locale_ in found_locales: if is_valid_locale(locale_): valid_locales.append(locale_) else: invalid_locales.append(locale_) display_locale_ = display_locale if display_locale in valid_locales else DEFAULT_LOCALE locales = { DEFAULT_LOCALE: { "displayName": ( get_display_name(DEFAULT_LOCALE, display_locale_) if display_locale != PSEUDO_LANGUAGE else "Default" ), "nativeName": get_display_name(DEFAULT_LOCALE, DEFAULT_LOCALE), } } for locale_ in valid_locales: locales[locale_] = { "displayName": get_display_name(locale_, display_locale_), "nativeName": get_display_name(locale_, locale_), } if invalid_locales: if PSEUDO_LANGUAGE in invalid_locales: invalid_locales.remove(PSEUDO_LANGUAGE) locales[PSEUDO_LANGUAGE] = { "displayName": "Pseudo-language", # Trick to ensure the proper language is selected in the language menu "nativeName": ( "to translate the UI" if display_locale != PSEUDO_LANGUAGE else "Pseudo-language" ), } # Check again as the pseudo-language was maybe the only invalid locale if invalid_locales: messages.append(f"The following locales are invalid: {invalid_locales}!") return locales, "\n".join(messages) def get_language_pack(locale_: str) -> tuple: """ Get a language pack for a given `locale_` and update with any installed package locales. Returns ------- tuple A tuple in the form `(locale_data_dict, message)`. Notes ----- We call `_get_installed_language_pack_locales` via a subprocess to guarantee the results represent the most up-to-date entry point information, which seems to be defined on interpreter startup. """ found_locales, message = _get_installed_language_pack_locales() found_packages_locales, message = get_installed_packages_locale(locale_) locale_data = {} messages = message.split("\n") if ( not message and (locale_ == PSEUDO_LANGUAGE or is_valid_locale(locale_)) and locale_ in found_locales ): path = found_locales[locale_] for root, __, files in os.walk(path, topdown=False): for name in files: if name.endswith(".json"): pkg_name = name.replace(".json", "") json_path = os.path.join(root, name) try: with open(json_path, encoding="utf-8") as fh: merged_data = json.load(fh) except Exception: messages.append(traceback.format_exc()) # Load packages with locale data and merge them if pkg_name in found_packages_locales: pkg_data = found_packages_locales[pkg_name] merged_data = merge_locale_data(merged_data, pkg_data) locale_data[pkg_name] = merged_data # Check if package locales exist that do not exists in language pack for pkg_name, data in found_packages_locales.items(): if pkg_name not in locale_data: locale_data[pkg_name] = data return locale_data, "\n".join(messages) # --- Translators # ---------------------------------------------------------------------------- class TranslationBundle: """ Translation bundle providing gettext translation functionality. """ def __init__(self, domain: str, locale_: str): """Initialize the bundle.""" self._domain = domain self._locale = locale_ self._translator = gettext.NullTranslations() self.update_locale(locale_) def update_locale(self, locale_: str) -> None: """ Update the locale. Parameters ---------- locale_: str The language name to use. """ # TODO: Need to handle packages that provide their own .mo files self._locale = locale_ localedir = None if locale_ != DEFAULT_LOCALE: language_pack_module = f"jupyterlab_language_pack_{locale_}" try: mod = importlib.import_module(language_pack_module) assert mod.__file__ is not None localedir = os.path.join(os.path.dirname(mod.__file__), LOCALE_DIR) except Exception: # noqa: S110 # no-op pass self._translator = gettext.translation( self._domain, localedir=localedir, languages=(self._locale,), fallback=True ) def gettext(self, msgid: str) -> str: """ Translate a singular string. Parameters ---------- msgid: str The singular string to translate. Returns ------- str The translated string. """ return self._translator.gettext(msgid) def ngettext(self, msgid: str, msgid_plural: str, n: int) -> str: """ Translate a singular string with pluralization. Parameters ---------- msgid: str The singular string to translate. msgid_plural: str The plural string to translate. n: int The number for pluralization. Returns ------- str The translated string. """ return self._translator.ngettext(msgid, msgid_plural, n) def pgettext(self, msgctxt: str, msgid: str) -> str: """ Translate a singular string with context. Parameters ---------- msgctxt: str The message context. msgid: str The singular string to translate. Returns ------- str The translated string. """ # Python 3.7 or lower does not offer translations based on context. # On these versions `pgettext` falls back to `gettext` if PY37_OR_LOWER: translation = self._translator.gettext(msgid) else: translation = self._translator.pgettext(msgctxt, msgid) return translation def npgettext(self, msgctxt: str, msgid: str, msgid_plural: str, n: int) -> str: """ Translate a singular string with context and pluralization. Parameters ---------- msgctxt: str The message context. msgid: str The singular string to translate. msgid_plural: str The plural string to translate. n: int The number for pluralization. Returns ------- str The translated string. """ # Python 3.7 or lower does not offer translations based on context. # On these versions `npgettext` falls back to `ngettext` if PY37_OR_LOWER: translation = self._translator.ngettext(msgid, msgid_plural, n) else: translation = self._translator.npgettext(msgctxt, msgid, msgid_plural, n) return translation # Shorthands def __(self, msgid: str) -> str: """ Shorthand for gettext. Parameters ---------- msgid: str The singular string to translate. Returns ------- str The translated string. """ return self.gettext(msgid) def _n(self, msgid: str, msgid_plural: str, n: int) -> str: """ Shorthand for ngettext. Parameters ---------- msgid: str The singular string to translate. msgid_plural: str The plural string to translate. n: int The number for pluralization. Returns ------- str The translated string. """ return self.ngettext(msgid, msgid_plural, n) def _p(self, msgctxt: str, msgid: str) -> str: """ Shorthand for pgettext. Parameters ---------- msgctxt: str The message context. msgid: str The singular string to translate. Returns ------- str The translated string. """ return self.pgettext(msgctxt, msgid) def _np(self, msgctxt: str, msgid: str, msgid_plural: str, n: int) -> str: """ Shorthand for npgettext. Parameters ---------- msgctxt: str The message context. msgid: str The singular string to translate. msgid_plural: str The plural string to translate. n: int The number for pluralization. Returns ------- str The translated string. """ return self.npgettext(msgctxt, msgid, msgid_plural, n) class translator: """ Translations manager. """ _TRANSLATORS: dict[str, TranslationBundle] = {} _LOCALE = SYS_LOCALE @staticmethod def normalize_domain(domain: str) -> str: """Normalize a domain name. Parameters ---------- domain: str Domain to normalize Returns ------- str Normalized domain """ return domain.replace("-", "_") @classmethod def set_locale(cls, locale_: str) -> None: """ Set locale for the translation bundles based on the settings. Parameters ---------- locale_: str The language name to use. """ if locale_ == cls._LOCALE: # Nothing to do bail early return if is_valid_locale(locale_): cls._LOCALE = locale_ for _, bundle in cls._TRANSLATORS.items(): bundle.update_locale(locale_) @classmethod def load(cls, domain: str) -> TranslationBundle: """ Load translation domain. The domain is usually the normalized ``package_name``. Parameters ---------- domain: str The translations domain. The normalized python package name. Returns ------- Translator A translator instance bound to the domain. """ norm_domain = translator.normalize_domain(domain) if norm_domain in cls._TRANSLATORS: trans = cls._TRANSLATORS[norm_domain] else: trans = TranslationBundle(norm_domain, cls._LOCALE) cls._TRANSLATORS[norm_domain] = trans return trans @staticmethod def _translate_schema_strings( translations: Any, schema: dict, prefix: str = "", to_translate: dict[Pattern, str] | None = None, ) -> None: """Translate a schema in-place.""" if to_translate is None: to_translate = _prepare_schema_patterns(schema) for key, value in schema.items(): path = prefix + "/" + key if isinstance(value, str): matched = False for pattern, context in to_translate.items(): # noqa: B007 if pattern.fullmatch(path): matched = True break if matched: schema[key] = translations.pgettext(context, value) elif isinstance(value, dict): translator._translate_schema_strings( translations, value, prefix=path, to_translate=to_translate, ) elif isinstance(value, list): for i, element in enumerate(value): if not isinstance(element, dict): continue translator._translate_schema_strings( translations, element, prefix=path + "[" + str(i) + "]", to_translate=to_translate, ) @staticmethod def translate_schema(schema: dict) -> dict: """Translate a schema. Parameters ---------- schema: dict The schema to be translated Returns ------- Dict The translated schema """ if translator._LOCALE == DEFAULT_LOCALE: return schema translations = translator.load( schema.get(_lab_i18n_config, {}).get("domain", DEFAULT_DOMAIN) ) new_schema = schema.copy() translator._translate_schema_strings(translations, new_schema) return new_schema