Spaces:
Running
Running
from typing import Any, Iterable, List, Optional, Tuple | |
from pdf2zh import settings | |
from pdf2zh.pdfparser import PDFSyntaxError | |
from pdf2zh.pdftypes import dict_value, int_value, list_value | |
from pdf2zh.utils import choplist | |
class NumberTree: | |
"""A PDF number tree. | |
See Section 3.8.6 of the PDF Reference. | |
""" | |
def __init__(self, obj: Any): | |
self._obj = dict_value(obj) | |
self.nums: Optional[Iterable[Any]] = None | |
self.kids: Optional[Iterable[Any]] = None | |
self.limits: Optional[Iterable[Any]] = None | |
if "Nums" in self._obj: | |
self.nums = list_value(self._obj["Nums"]) | |
if "Kids" in self._obj: | |
self.kids = list_value(self._obj["Kids"]) | |
if "Limits" in self._obj: | |
self.limits = list_value(self._obj["Limits"]) | |
def _parse(self) -> List[Tuple[int, Any]]: | |
items = [] | |
if self.nums: # Leaf node | |
for k, v in choplist(2, self.nums): | |
items.append((int_value(k), v)) | |
if self.kids: # Root or intermediate node | |
for child_ref in self.kids: | |
items += NumberTree(child_ref)._parse() | |
return items | |
values: List[Tuple[int, Any]] # workaround decorators unsupported by mypy | |
# type: ignore[no-redef,misc] | |
def values(self) -> List[Tuple[int, Any]]: | |
values = self._parse() | |
if settings.STRICT: | |
if not all(a[0] <= b[0] for a, b in zip(values, values[1:])): | |
raise PDFSyntaxError("Number tree elements are out of order") | |
else: | |
values.sort(key=lambda t: t[0]) | |
return values | |