Spaces:
Runtime error
Runtime error
""" | |
Implementation of the Truetype file format. | |
Typical applications will not need to use this module directly; look at | |
`pyglet.font` instead. | |
References: | |
* http://developer.apple.com/fonts/TTRefMan/RM06 | |
* http://www.microsoft.com/typography/otspec | |
""" | |
import os | |
import mmap | |
import struct | |
import codecs | |
class TruetypeInfo: | |
"""Information about a single Truetype face. | |
The class memory-maps the font file to read the tables, so | |
it is vital that you call the `close` method to avoid large memory | |
leaks. Once closed, you cannot call any of the ``get_*`` methods. | |
Not all tables have been implemented yet (or likely ever will). | |
Currently only the name and metric tables are read; in particular | |
there is no glyph or hinting information. | |
""" | |
_name_id_lookup = { | |
'copyright': 0, | |
'family': 1, | |
'subfamily': 2, | |
'identifier': 3, | |
'name': 4, | |
'version': 5, | |
'postscript': 6, | |
'trademark': 7, | |
'manufacturer': 8, | |
'designer': 9, | |
'description': 10, | |
'vendor-url': 11, | |
'designer-url': 12, | |
'license': 13, | |
'license-url': 14, | |
'preferred-family': 16, | |
'preferred-subfamily': 17, | |
'compatible-name': 18, | |
'sample': 19, | |
} | |
_platform_id_lookup = { | |
'unicode': 0, | |
'macintosh': 1, | |
'iso': 2, | |
'microsoft': 3, | |
'custom': 4 | |
} | |
_microsoft_encoding_lookup = { | |
1: 'utf_16_be', | |
2: 'shift_jis', | |
4: 'big5', | |
6: 'johab', | |
10: 'utf_16_be' | |
} | |
_macintosh_encoding_lookup = { | |
0: 'mac_roman' | |
} | |
def __init__(self, filename): | |
"""Read the given TrueType file. | |
:Parameters: | |
`filename` | |
The name of any Windows, OS2 or Macintosh Truetype file. | |
The object must be closed (see `close`) after use. | |
An exception will be raised if the file does not exist or cannot | |
be read. | |
""" | |
assert filename, "must provide a font file name" | |
length = os.stat(filename).st_size | |
self._fileno = os.open(filename, os.O_RDONLY) | |
if hasattr(mmap, 'MAP_SHARED'): | |
self._data = mmap.mmap(self._fileno, length, mmap.MAP_SHARED, mmap.PROT_READ) | |
else: | |
self._data = mmap.mmap(self._fileno, length, None, mmap.ACCESS_READ) | |
self._closed = False | |
offsets = _read_offset_table(self._data, 0) | |
self._tables = {} | |
for table in _read_table_directory_entry.array(self._data, offsets.size, offsets.num_tables): | |
self._tables[table.tag] = table | |
self._names = None | |
self._horizontal_metrics = None | |
self._character_advances = None | |
self._character_kernings = None | |
self._glyph_kernings = None | |
self._character_map = None | |
self._glyph_map = None | |
self._font_selection_flags = None | |
self.header = _read_head_table(self._data, self._tables['head'].offset) | |
self.horizontal_header = _read_horizontal_header(self._data, self._tables['hhea'].offset) | |
def get_font_selection_flags(self): | |
"""Return the font selection flags, as defined in OS/2 table""" | |
if not self._font_selection_flags: | |
OS2_table = _read_OS2_table(self._data, self._tables['OS/2'].offset) | |
self._font_selection_flags = OS2_table.fs_selection | |
return self._font_selection_flags | |
def is_bold(self): | |
"""Returns True iff the font describes itself as bold.""" | |
return bool(self.get_font_selection_flags() & 0x20) | |
def is_italic(self): | |
"""Returns True iff the font describes itself as italic.""" | |
return bool(self.get_font_selection_flags() & 0x1) | |
def get_names(self): | |
"""Returns a dictionary of names defined in the file. | |
The key of each item is a tuple of ``platform_id``, ``name_id``, | |
where each ID is the number as described in the Truetype format. | |
The value of each item is a tuple of | |
``encoding_id``, ``language_id``, ``value``, where ``value`` is | |
an encoded string. | |
""" | |
if self._names: | |
return self._names | |
naming_table = _read_naming_table(self._data, self._tables['name'].offset) | |
name_records = _read_name_record.array( | |
self._data, self._tables['name'].offset + naming_table.size, naming_table.count) | |
storage = naming_table.string_offset + self._tables['name'].offset | |
self._names = {} | |
for record in name_records: | |
value = self._data[record.offset + storage: record.offset + storage + record.length] | |
key = record.platform_id, record.name_id | |
value = (record.encoding_id, record.language_id, value) | |
if key not in self._names: | |
self._names[key] = [] | |
self._names[key].append(value) | |
return self._names | |
def get_name(self, name, platform=None, languages=None): | |
"""Returns the value of the given name in this font. | |
:Parameters: | |
`name` | |
Either an integer, representing the name_id desired (see | |
font format); or a string describing it, see below for | |
valid names. | |
`platform` | |
Platform for the requested name. Can be the integer ID, | |
or a string describing it. By default, the Microsoft | |
platform is searched first, then Macintosh. | |
`languages` | |
A list of language IDs to search. The first language | |
which defines the requested name will be used. By default, | |
all English dialects are searched. | |
If the name is not found, ``None`` is returned. If the name | |
is found, the value will be decoded and returned as a unicode | |
string. Currently only some common encodings are supported. | |
Valid names to request are (supply as a string):: | |
'copyright' | |
'family' | |
'subfamily' | |
'identifier' | |
'name' | |
'version' | |
'postscript' | |
'trademark' | |
'manufacturer' | |
'designer' | |
'description' | |
'vendor-url' | |
'designer-url' | |
'license' | |
'license-url' | |
'preferred-family' | |
'preferred-subfamily' | |
'compatible-name' | |
'sample' | |
Valid platforms to request are (supply as a string):: | |
'unicode' | |
'macintosh' | |
'iso' | |
'microsoft' | |
'custom' | |
""" | |
names = self.get_names() | |
if type(name) == str: | |
name = self._name_id_lookup[name] | |
if not platform: | |
for platform in ('microsoft', 'macintosh'): | |
value = self.get_name(name, platform, languages) | |
if value: | |
return value | |
if type(platform) == str: | |
platform = self._platform_id_lookup[platform] | |
if not (platform, name) in names: | |
return None | |
if platform == 3: # setup for microsoft | |
encodings = self._microsoft_encoding_lookup | |
if not languages: | |
# Default to english languages for microsoft | |
languages = (0x409, 0x809, 0xc09, 0x1009, 0x1409, 0x1809) | |
elif platform == 1: # setup for macintosh | |
encodings = self.__macintosh_encoding_lookup | |
if not languages: | |
# Default to english for macintosh | |
languages = (0,) | |
for record in names[(platform, name)]: | |
if record[1] in languages and record[0] in encodings: | |
decoder = codecs.getdecoder(encodings[record[0]]) | |
return decoder(record[2])[0] | |
return None | |
def get_horizontal_metrics(self): | |
"""Return all horizontal metric entries in table format.""" | |
if not self._horizontal_metrics: | |
ar = _read_long_hor_metric.array(self._data, | |
self._tables['hmtx'].offset, | |
self.horizontal_header.number_of_h_metrics) | |
self._horizontal_metrics = ar | |
return self._horizontal_metrics | |
def get_character_advances(self): | |
"""Return a dictionary of character->advance. | |
They key of the dictionary is a unit-length unicode string, | |
and the value is a float giving the horizontal advance in | |
em. | |
""" | |
if self._character_advances: | |
return self._character_advances | |
ga = self.get_glyph_advances() | |
gmap = self.get_glyph_map() | |
self._character_advances = {} | |
for i in range(len(ga)): | |
if i in gmap and not gmap[i] in self._character_advances: | |
self._character_advances[gmap[i]] = ga[i] | |
return self._character_advances | |
def get_glyph_advances(self): | |
"""Return a dictionary of glyph->advance. | |
They key of the dictionary is the glyph index and the value is a float | |
giving the horizontal advance in em. | |
""" | |
hm = self.get_horizontal_metrics() | |
return [float(m.advance_width) / self.header.units_per_em for m in hm] | |
def get_character_kernings(self): | |
"""Return a dictionary of (left,right)->kerning | |
The key of the dictionary is a tuple of ``(left, right)`` | |
where each element is a unit-length unicode string. The | |
value of the dictionary is the horizontal pairwise kerning | |
in em. | |
""" | |
if not self._character_kernings: | |
gmap = self.get_glyph_map() | |
kerns = self.get_glyph_kernings() | |
self._character_kernings = {} | |
for pair, value in kerns.items(): | |
lglyph, rglyph = pair | |
lchar = lglyph in gmap and gmap[lglyph] or None | |
rchar = rglyph in gmap and gmap[rglyph] or None | |
if lchar and rchar: | |
self._character_kernings[(lchar, rchar)] = value | |
return self._character_kernings | |
def get_glyph_kernings(self): | |
"""Return a dictionary of (left,right)->kerning | |
The key of the dictionary is a tuple of ``(left, right)`` | |
where each element is a glyph index. The value of the dictionary is | |
the horizontal pairwise kerning in em. | |
""" | |
if self._glyph_kernings: | |
return self._glyph_kernings | |
header = \ | |
_read_kern_header_table(self._data, self._tables['kern'].offset) | |
offset = self._tables['kern'].offset + header.size | |
kernings = {} | |
for i in range(header.n_tables): | |
header = _read_kern_subtable_header(self._data, offset) | |
if header.coverage & header.horizontal_mask \ | |
and not header.coverage & header.minimum_mask \ | |
and not header.coverage & header.perpendicular_mask: | |
if header.coverage & header.format_mask == 0: | |
self._add_kernings_format0(kernings, offset + header.size) | |
offset += header.length | |
self._glyph_kernings = kernings | |
return kernings | |
def _add_kernings_format0(self, kernings, offset): | |
header = _read_kern_subtable_format0(self._data, offset) | |
kerning_pairs = _read_kern_subtable_format0Pair.array(self._data, | |
offset + header.size, header.n_pairs) | |
for pair in kerning_pairs: | |
if (pair.left, pair.right) in kernings: | |
kernings[(pair.left, pair.right)] += pair.value \ | |
/ float(self.header.units_per_em) | |
else: | |
kernings[(pair.left, pair.right)] = pair.value \ | |
/ float(self.header.units_per_em) | |
def get_glyph_map(self): | |
"""Calculate and return a reverse character map. | |
Returns a dictionary where the key is a glyph index and the | |
value is a unit-length unicode string. | |
""" | |
if self._glyph_map: | |
return self._glyph_map | |
cmap = self.get_character_map() | |
self._glyph_map = {} | |
for ch, glyph in cmap.items(): | |
if not glyph in self._glyph_map: | |
self._glyph_map[glyph] = ch | |
return self._glyph_map | |
def get_character_map(self): | |
"""Return the character map. | |
Returns a dictionary where the key is a unit-length unicode | |
string and the value is a glyph index. Currently only | |
format 4 character maps are read. | |
""" | |
if self._character_map: | |
return self._character_map | |
cmap = _read_cmap_header(self._data, self._tables['cmap'].offset) | |
records = _read_cmap_encoding_record.array(self._data, | |
self._tables['cmap'].offset + cmap.size, cmap.num_tables) | |
self._character_map = {} | |
for record in records: | |
if record.platform_id == 3 and record.encoding_id == 1: | |
# Look at Windows Unicode charmaps only | |
offset = self._tables['cmap'].offset + record.offset | |
format_header = _read_cmap_format_header(self._data, offset) | |
if format_header.format == 4: | |
self._character_map = \ | |
self._get_character_map_format4(offset) | |
break | |
return self._character_map | |
def _get_character_map_format4(self, offset): | |
# This is absolutely, without question, the *worst* file | |
# format ever. Whoever the fuckwit is that thought this up is | |
# a fuckwit. | |
header = _read_cmap_format4Header(self._data, offset) | |
seg_count = header.seg_count_x2 // 2 | |
array_size = struct.calcsize(f'>{seg_count}') | |
end_count = self._read_array(f'>{seg_count}', | |
offset + header.size) | |
start_count = self._read_array(f'>{seg_count}', | |
offset + header.size + array_size + 2) | |
id_delta = self._read_array(f'>{seg_count}', | |
offset + header.size + array_size + 2 + array_size) | |
id_range_offset_address = \ | |
offset + header.size + array_size + 2 + array_size + array_size | |
id_range_offset = self._read_array(f'>{seg_count}', | |
id_range_offset_address) | |
character_map = {} | |
for i in range(0, seg_count): | |
if id_range_offset[i] != 0: | |
if id_range_offset[i] == 65535: | |
continue # Hack around a dodgy font (babelfish.ttf) | |
for c in range(start_count[i], end_count[i] + 1): | |
addr = id_range_offset[i] + 2 * (c - start_count[i]) + \ | |
id_range_offset_address + 2 * i | |
g = struct.unpack('>H', self._data[addr:addr + 2])[0] | |
if g != 0: | |
character_map[chr(c)] = (g + id_delta[i]) % 65536 | |
else: | |
for c in range(start_count[i], end_count[i] + 1): | |
g = (c + id_delta[i]) % 65536 | |
if g != 0: | |
character_map[chr(c)] = g | |
return character_map | |
def _read_array(self, format, offset): | |
size = struct.calcsize(format) | |
return struct.unpack(format, self._data[offset:offset + size]) | |
def close(self): | |
"""Close the font file. | |
This is a good idea, since the entire file is memory mapped in | |
until this method is called. After closing cannot rely on the | |
``get_*`` methods. | |
""" | |
self._data.close() | |
os.close(self._fileno) | |
self._closed = True | |
def __del__(self): | |
if not self._closed: | |
self.close() | |
def _read_table(*entries): | |
""" Generic table constructor used for table formats listed at | |
end of file.""" | |
fmt = '>' | |
names = [] | |
for entry in entries: | |
name, entry_type = entry.split(':') | |
names.append(name) | |
fmt += entry_type | |
class TableClass: | |
size = struct.calcsize(fmt) | |
def __init__(self, data, offset): | |
items = struct.unpack(fmt, data[offset:offset + self.size]) | |
self.pairs = list(zip(names, items)) | |
for pname, pvalue in self.pairs: | |
if isinstance(pvalue, bytes): | |
pvalue = pvalue.decode('utf-8') | |
setattr(self, pname, pvalue) | |
def __repr__(self): | |
return '{'+', '.join([f'{pname} = {pvalue}' for pname, pvalue in self.pairs])+'}' | |
def array(data, offset, count): | |
tables = [] | |
for i in range(count): | |
tables.append(TableClass(data, offset)) | |
offset += TableClass.size | |
return tables | |
return TableClass | |
# Table formats (see references) | |
_read_offset_table = _read_table('scalertype:I', | |
'num_tables:H', | |
'search_range:H', | |
'entry_selector:H', | |
'range_shift:H') | |
_read_table_directory_entry = _read_table('tag:4s', | |
'check_sum:I', | |
'offset:I', | |
'length:I') | |
_read_head_table = _read_table('version:i', | |
'font_revision:i', | |
'check_sum_adjustment:L', | |
'magic_number:L', | |
'flags:H', | |
'units_per_em:H', | |
'created:Q', | |
'modified:Q', | |
'x_min:h', | |
'y_min:h', | |
'x_max:h', | |
'y_max:h', | |
'mac_style:H', | |
'lowest_rec_p_pEM:H', | |
'font_direction_hint:h', | |
'index_to_loc_format:h', | |
'glyph_data_format:h') | |
_read_OS2_table = _read_table('version:H', | |
'x_avg_char_width:h', | |
'us_weight_class:H', | |
'us_width_class:H', | |
'fs_type:H', | |
'y_subscript_x_size:h', | |
'y_subscript_y_size:h', | |
'y_subscript_x_offset:h', | |
'y_subscript_y_offset:h', | |
'y_superscript_x_size:h', | |
'y_superscript_y_size:h', | |
'y_superscript_x_offset:h', | |
'y_superscript_y_offset:h', | |
'y_strikeout_size:h', | |
'y_strikeout_position:h', | |
's_family_class:h', | |
'panose1:B', | |
'panose2:B', | |
'panose3:B', | |
'panose4:B', | |
'panose5:B', | |
'panose6:B', | |
'panose7:B', | |
'panose8:B', | |
'panose9:B', | |
'panose10:B', | |
'ul_unicode_range1:L', | |
'ul_unicode_range2:L', | |
'ul_unicode_range3:L', | |
'ul_unicode_range4:L', | |
'ach_vend_id:I', | |
'fs_selection:H', | |
'us_first_char_index:H', | |
'us_last_char_index:H', | |
's_typo_ascender:h', | |
's_typo_descender:h', | |
's_typo_line_gap:h', | |
'us_win_ascent:H', | |
'us_win_descent:H', | |
'ul_code_page_range1:L', | |
'ul_code_page_range2:L', | |
'sx_height:h', | |
's_cap_height:h', | |
'us_default_char:H', | |
'us_break_char:H', | |
'us_max_context:H') | |
_read_kern_header_table = _read_table('version_num:H', | |
'n_tables:H') | |
_read_kern_subtable_header = _read_table('version:H', | |
'length:H', | |
'coverage:H') | |
_read_kern_subtable_header.horizontal_mask = 0x1 | |
_read_kern_subtable_header.minimum_mask = 0x2 | |
_read_kern_subtable_header.perpendicular_mask = 0x4 | |
_read_kern_subtable_header.override_mask = 0x5 | |
_read_kern_subtable_header.format_mask = 0xf0 | |
_read_kern_subtable_format0 = _read_table('n_pairs:H', | |
'search_range:H', | |
'entry_selector:H', | |
'range_shift:H') | |
_read_kern_subtable_format0Pair = _read_table('left:H', | |
'right:H', | |
'value:h') | |
_read_cmap_header = _read_table('version:H', | |
'num_tables:H') | |
_read_cmap_encoding_record = _read_table('platform_id:H', | |
'encoding_id:H', | |
'offset:L') | |
_read_cmap_format_header = _read_table('format:H', | |
'length:H') | |
_read_cmap_format4Header = _read_table('format:H', | |
'length:H', | |
'language:H', | |
'seg_count_x2:H', | |
'search_range:H', | |
'entry_selector:H', | |
'range_shift:H') | |
_read_horizontal_header = _read_table('version:i', | |
'Advance:h', | |
'Descender:h', | |
'LineGap:h', | |
'advance_width_max:H', | |
'min_left_side_bearing:h', | |
'min_right_side_bearing:h', | |
'x_max_extent:h', | |
'caret_slope_rise:h', | |
'caret_slope_run:h', | |
'caret_offset:h', | |
'reserved1:h', | |
'reserved2:h', | |
'reserved3:h', | |
'reserved4:h', | |
'metric_data_format:h', | |
'number_of_h_metrics:H') | |
_read_long_hor_metric = _read_table('advance_width:H', | |
'lsb:h') | |
_read_naming_table = _read_table('format:H', | |
'count:H', | |
'string_offset:H') | |
_read_name_record = _read_table('platform_id:H', | |
'encoding_id:H', | |
'language_id:H', | |
'name_id:H', | |
'length:H', | |
'offset:H') | |