|
from pdf2image import convert_from_path |
|
import os |
|
import shutil |
|
|
|
class PdfManager: |
|
def __init__(self): |
|
pass |
|
|
|
def clear_and_recreate_dir(self, output_folder): |
|
|
|
print(f"Clearing output folder {output_folder}") |
|
|
|
if os.path.exists(output_folder): |
|
shutil.rmtree(output_folder) |
|
|
|
else: |
|
os.makedirs(output_folder) |
|
|
|
|
|
|
|
def save_images(self, id, pdf_path, max_pages, pages: list[int] = None) -> list[str]: |
|
output_folder = f"pages/{id}" |
|
images = convert_from_path(pdf_path) |
|
|
|
print(f"Saving images from {pdf_path} to {output_folder}. Max pages: {max_pages}") |
|
|
|
self.clear_and_recreate_dir(output_folder) |
|
|
|
num_page_processed = 0 |
|
|
|
for i, image in enumerate(images): |
|
if max_pages and num_page_processed >= max_pages: |
|
break |
|
|
|
if pages and i not in pages: |
|
continue |
|
|
|
full_save_path = f"{output_folder}/page_{i + 1}.png" |
|
|
|
|
|
|
|
image.save(full_save_path, "PNG") |
|
|
|
num_page_processed += 1 |
|
|
|
return [f"{output_folder}/page_{i + 1}.png" for i in range(num_page_processed)] |
|
|