File size: 9,116 Bytes
d1ceb73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
"""
babel.localedata
~~~~~~~~~~~~~~~~
Low-level locale data access.
:note: The `Locale` class, which uses this module under the hood, provides a
more convenient interface for accessing the locale data.
:copyright: (c) 2013-2024 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from __future__ import annotations
import os
import pickle
import re
import sys
import threading
from collections import abc
from collections.abc import Iterator, Mapping, MutableMapping
from functools import lru_cache
from itertools import chain
from typing import Any
_cache: dict[str, Any] = {}
_cache_lock = threading.RLock()
_dirname = os.path.join(os.path.dirname(__file__), 'locale-data')
_windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I)
def normalize_locale(name: str) -> str | None:
"""Normalize a locale ID by stripping spaces and apply proper casing.
Returns the normalized locale ID string or `None` if the ID is not
recognized.
"""
if not name or not isinstance(name, str):
return None
name = name.strip().lower()
for locale_id in chain.from_iterable([_cache, locale_identifiers()]):
if name == locale_id.lower():
return locale_id
def resolve_locale_filename(name: os.PathLike[str] | str) -> str:
"""
Resolve a locale identifier to a `.dat` path on disk.
"""
# Clean up any possible relative paths.
name = os.path.basename(name)
# Ensure we're not left with one of the Windows reserved names.
if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]):
raise ValueError(f"Name {name} is invalid on Windows")
# Build the path.
return os.path.join(_dirname, f"{name}.dat")
def exists(name: str) -> bool:
"""Check whether locale data is available for the given locale.
Returns `True` if it exists, `False` otherwise.
:param name: the locale identifier string
"""
if not name or not isinstance(name, str):
return False
if name in _cache:
return True
file_found = os.path.exists(resolve_locale_filename(name))
return True if file_found else bool(normalize_locale(name))
@lru_cache(maxsize=None)
def locale_identifiers() -> list[str]:
"""Return a list of all locale identifiers for which locale data is
available.
This data is cached after the first invocation.
You can clear the cache by calling `locale_identifiers.cache_clear()`.
.. versionadded:: 0.8.1
:return: a list of locale identifiers (strings)
"""
return [
stem
for stem, extension in
(os.path.splitext(filename) for filename in os.listdir(_dirname))
if extension == '.dat' and stem != 'root'
]
def _is_non_likely_script(name: str) -> bool:
"""Return whether the locale is of the form ``lang_Script``,
and the script is not the likely script for the language.
This implements the behavior of the ``nonlikelyScript`` value of the
``localRules`` attribute for parent locales added in CLDR 45.
"""
from babel.core import get_global, parse_locale
try:
lang, territory, script, variant, *rest = parse_locale(name)
except ValueError:
return False
if lang and script and not territory and not variant and not rest:
likely_subtag = get_global('likely_subtags').get(lang)
_, _, likely_script, *_ = parse_locale(likely_subtag)
return script != likely_script
return False
def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
"""Load the locale data for the given locale.
The locale data is a dictionary that contains much of the data defined by
the Common Locale Data Repository (CLDR). This data is stored as a
collection of pickle files inside the ``babel`` package.
>>> d = load('en_US')
>>> d['languages']['sv']
u'Swedish'
Note that the results are cached, and subsequent requests for the same
locale return the same dictionary:
>>> d1 = load('en_US')
>>> d2 = load('en_US')
>>> d1 is d2
True
:param name: the locale identifier string (or "root")
:param merge_inherited: whether the inherited data should be merged into
the data of the requested locale
:raise `IOError`: if no locale data file is found for the given locale
identifier, or one of the locales it inherits from
"""
name = os.path.basename(name)
_cache_lock.acquire()
try:
data = _cache.get(name)
if not data:
# Load inherited data
if name == 'root' or not merge_inherited:
data = {}
else:
from babel.core import get_global
parent = get_global('parent_exceptions').get(name)
if not parent:
if _is_non_likely_script(name):
parent = 'root'
else:
parts = name.split('_')
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
data = load(parent).copy()
filename = resolve_locale_filename(name)
with open(filename, 'rb') as fileobj:
if name != 'root' and merge_inherited:
merge(data, pickle.load(fileobj))
else:
data = pickle.load(fileobj)
_cache[name] = data
return data
finally:
_cache_lock.release()
def merge(dict1: MutableMapping[Any, Any], dict2: Mapping[Any, Any]) -> None:
"""Merge the data from `dict2` into the `dict1` dictionary, making copies
of nested dictionaries.
>>> d = {1: 'foo', 3: 'baz'}
>>> merge(d, {1: 'Foo', 2: 'Bar'})
>>> sorted(d.items())
[(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
:param dict1: the dictionary to merge into
:param dict2: the dictionary containing the data that should be merged
"""
for key, val2 in dict2.items():
if val2 is not None:
val1 = dict1.get(key)
if isinstance(val2, dict):
if val1 is None:
val1 = {}
if isinstance(val1, Alias):
val1 = (val1, val2)
elif isinstance(val1, tuple):
alias, others = val1
others = others.copy()
merge(others, val2)
val1 = (alias, others)
else:
val1 = val1.copy()
merge(val1, val2)
else:
val1 = val2
dict1[key] = val1
class Alias:
"""Representation of an alias in the locale data.
An alias is a value that refers to some other part of the locale data,
as specified by the `keys`.
"""
def __init__(self, keys: tuple[str, ...]) -> None:
self.keys = tuple(keys)
def __repr__(self) -> str:
return f"<{type(self).__name__} {self.keys!r}>"
def resolve(self, data: Mapping[str | int | None, Any]) -> Mapping[str | int | None, Any]:
"""Resolve the alias based on the given data.
This is done recursively, so if one alias resolves to a second alias,
that second alias will also be resolved.
:param data: the locale data
:type data: `dict`
"""
base = data
for key in self.keys:
data = data[key]
if isinstance(data, Alias):
data = data.resolve(base)
elif isinstance(data, tuple):
alias, others = data
data = alias.resolve(base)
return data
class LocaleDataDict(abc.MutableMapping):
"""Dictionary wrapper that automatically resolves aliases to the actual
values.
"""
def __init__(self, data: MutableMapping[str | int | None, Any], base: Mapping[str | int | None, Any] | None = None):
self._data = data
if base is None:
base = data
self.base = base
def __len__(self) -> int:
return len(self._data)
def __iter__(self) -> Iterator[str | int | None]:
return iter(self._data)
def __getitem__(self, key: str | int | None) -> Any:
orig = val = self._data[key]
if isinstance(val, Alias): # resolve an alias
val = val.resolve(self.base)
if isinstance(val, tuple): # Merge a partial dict with an alias
alias, others = val
val = alias.resolve(self.base).copy()
merge(val, others)
if isinstance(val, dict): # Return a nested alias-resolving dict
val = LocaleDataDict(val, base=self.base)
if val is not orig:
self._data[key] = val
return val
def __setitem__(self, key: str | int | None, value: Any) -> None:
self._data[key] = value
def __delitem__(self, key: str | int | None) -> None:
del self._data[key]
def copy(self) -> LocaleDataDict:
return LocaleDataDict(self._data.copy(), base=self.base)
|