Spaces:
Runtime error
Runtime error
from PyPDF2 import PdfReader | |
class Paper(object): | |
def __init__(self, pdf_obj: PdfReader) -> None: | |
self._pdf_obj = pdf_obj | |
self._paper_meta = self._pdf_obj.metadata | |
def iter_pages(self, iter_text_len: int = 3000): | |
page_idx = 0 | |
for page in self._pdf_obj.pages: | |
txt = page.extract_text() | |
for i in range((len(txt) // iter_text_len) + 1): | |
yield page_idx, i, txt[i * iter_text_len:(i + 1) * iter_text_len] | |
page_idx += 1 | |
if __name__ == '__main__': | |
reader = PdfReader('../alexnet.pdf') | |
paper = Paper(reader) | |