|
""" |
|
babel.messages.mofile |
|
~~~~~~~~~~~~~~~~~~~~~ |
|
|
|
Writing of files in the ``gettext`` MO (machine object) format. |
|
|
|
:copyright: (c) 2013-2024 by the Babel Team. |
|
:license: BSD, see LICENSE for more details. |
|
""" |
|
from __future__ import annotations |
|
|
|
import array |
|
import struct |
|
from typing import TYPE_CHECKING |
|
|
|
from babel.messages.catalog import Catalog, Message |
|
|
|
if TYPE_CHECKING: |
|
from _typeshed import SupportsRead, SupportsWrite |
|
|
|
LE_MAGIC: int = 0x950412de |
|
BE_MAGIC: int = 0xde120495 |
|
|
|
|
|
def read_mo(fileobj: SupportsRead[bytes]) -> Catalog: |
|
"""Read a binary MO file from the given file-like object and return a |
|
corresponding `Catalog` object. |
|
|
|
:param fileobj: the file-like object to read the MO file from |
|
|
|
:note: The implementation of this function is heavily based on the |
|
``GNUTranslations._parse`` method of the ``gettext`` module in the |
|
standard library. |
|
""" |
|
catalog = Catalog() |
|
headers = {} |
|
|
|
filename = getattr(fileobj, 'name', '') |
|
|
|
buf = fileobj.read() |
|
buflen = len(buf) |
|
unpack = struct.unpack |
|
|
|
|
|
|
|
magic = unpack('<I', buf[:4])[0] |
|
if magic == LE_MAGIC: |
|
version, msgcount, origidx, transidx = unpack('<4I', buf[4:20]) |
|
ii = '<II' |
|
elif magic == BE_MAGIC: |
|
version, msgcount, origidx, transidx = unpack('>4I', buf[4:20]) |
|
ii = '>II' |
|
else: |
|
raise OSError(0, 'Bad magic number', filename) |
|
|
|
|
|
|
|
for _i in range(msgcount): |
|
mlen, moff = unpack(ii, buf[origidx:origidx + 8]) |
|
mend = moff + mlen |
|
tlen, toff = unpack(ii, buf[transidx:transidx + 8]) |
|
tend = toff + tlen |
|
if mend < buflen and tend < buflen: |
|
msg = buf[moff:mend] |
|
tmsg = buf[toff:tend] |
|
else: |
|
raise OSError(0, 'File is corrupt', filename) |
|
|
|
|
|
if mlen == 0: |
|
|
|
lastkey = key = None |
|
for item in tmsg.splitlines(): |
|
item = item.strip() |
|
if not item: |
|
continue |
|
if b':' in item: |
|
key, value = item.split(b':', 1) |
|
lastkey = key = key.strip().lower() |
|
headers[key] = value.strip() |
|
elif lastkey: |
|
headers[lastkey] += b'\n' + item |
|
|
|
if b'\x04' in msg: |
|
ctxt, msg = msg.split(b'\x04') |
|
else: |
|
ctxt = None |
|
|
|
if b'\x00' in msg: |
|
msg = msg.split(b'\x00') |
|
tmsg = tmsg.split(b'\x00') |
|
if catalog.charset: |
|
msg = [x.decode(catalog.charset) for x in msg] |
|
tmsg = [x.decode(catalog.charset) for x in tmsg] |
|
else: |
|
if catalog.charset: |
|
msg = msg.decode(catalog.charset) |
|
tmsg = tmsg.decode(catalog.charset) |
|
catalog[msg] = Message(msg, tmsg, context=ctxt) |
|
|
|
|
|
origidx += 8 |
|
transidx += 8 |
|
|
|
catalog.mime_headers = headers.items() |
|
return catalog |
|
|
|
|
|
def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = False) -> None: |
|
"""Write a catalog to the specified file-like object using the GNU MO file |
|
format. |
|
|
|
>>> import sys |
|
>>> from babel.messages import Catalog |
|
>>> from gettext import GNUTranslations |
|
>>> from io import BytesIO |
|
|
|
>>> catalog = Catalog(locale='en_US') |
|
>>> catalog.add('foo', 'Voh') |
|
<Message ...> |
|
>>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz')) |
|
<Message ...> |
|
>>> catalog.add('fuz', 'Futz', flags=['fuzzy']) |
|
<Message ...> |
|
>>> catalog.add('Fizz', '') |
|
<Message ...> |
|
>>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) |
|
<Message ...> |
|
>>> buf = BytesIO() |
|
|
|
>>> write_mo(buf, catalog) |
|
>>> x = buf.seek(0) |
|
>>> translations = GNUTranslations(fp=buf) |
|
>>> if sys.version_info[0] >= 3: |
|
... translations.ugettext = translations.gettext |
|
... translations.ungettext = translations.ngettext |
|
>>> translations.ugettext('foo') |
|
u'Voh' |
|
>>> translations.ungettext('bar', 'baz', 1) |
|
u'Bahr' |
|
>>> translations.ungettext('bar', 'baz', 2) |
|
u'Batz' |
|
>>> translations.ugettext('fuz') |
|
u'fuz' |
|
>>> translations.ugettext('Fizz') |
|
u'Fizz' |
|
>>> translations.ugettext('Fuzz') |
|
u'Fuzz' |
|
>>> translations.ugettext('Fuzzes') |
|
u'Fuzzes' |
|
|
|
:param fileobj: the file-like object to write to |
|
:param catalog: the `Catalog` instance |
|
:param use_fuzzy: whether translations marked as "fuzzy" should be included |
|
in the output |
|
""" |
|
messages = list(catalog) |
|
messages[1:] = [m for m in messages[1:] |
|
if m.string and (use_fuzzy or not m.fuzzy)] |
|
messages.sort() |
|
|
|
ids = strs = b'' |
|
offsets = [] |
|
|
|
for message in messages: |
|
|
|
|
|
if message.pluralizable: |
|
msgid = b'\x00'.join([ |
|
msgid.encode(catalog.charset) for msgid in message.id |
|
]) |
|
msgstrs = [] |
|
for idx, string in enumerate(message.string): |
|
if not string: |
|
msgstrs.append(message.id[min(int(idx), 1)]) |
|
else: |
|
msgstrs.append(string) |
|
msgstr = b'\x00'.join([ |
|
msgstr.encode(catalog.charset) for msgstr in msgstrs |
|
]) |
|
else: |
|
msgid = message.id.encode(catalog.charset) |
|
msgstr = message.string.encode(catalog.charset) |
|
if message.context: |
|
msgid = b'\x04'.join([message.context.encode(catalog.charset), |
|
msgid]) |
|
offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) |
|
ids += msgid + b'\x00' |
|
strs += msgstr + b'\x00' |
|
|
|
|
|
|
|
keystart = 7 * 4 + 16 * len(messages) |
|
valuestart = keystart + len(ids) |
|
|
|
|
|
|
|
koffsets = [] |
|
voffsets = [] |
|
for o1, l1, o2, l2 in offsets: |
|
koffsets += [l1, o1 + keystart] |
|
voffsets += [l2, o2 + valuestart] |
|
offsets = koffsets + voffsets |
|
|
|
fileobj.write(struct.pack('Iiiiiii', |
|
LE_MAGIC, |
|
0, |
|
len(messages), |
|
7 * 4, |
|
7 * 4 + len(messages) * 8, |
|
0, 0, |
|
) + array.array.tobytes(array.array("i", offsets)) + ids + strs) |
|
|