|
import os
|
|
from pdf2image import convert_from_path
|
|
|
|
|
|
def convert_pdf_to_images(pdf_path, output_format="png", max_pages=None):
|
|
"""Converts a single PDF file to images.
|
|
|
|
Args:
|
|
pdf_path (str): Path to the PDF file.
|
|
output_format (str, optional): Desired output format for images (default: "png").
|
|
Supported formats are "png", "jpg", and "ppm".
|
|
max_pages (int, optional): Maximum number of pages to convert (default: None, all pages).
|
|
"""
|
|
|
|
try:
|
|
pdf_name, _ = os.path.splitext(os.path.basename(pdf_path))
|
|
images = convert_from_path(pdf_path, fmt=output_format, first_page=1, last_page=max_pages or None)
|
|
buffer_folder_path = os.path.join(os.path.dirname(pdf_path), pdf_name)
|
|
os.makedirs(buffer_folder_path, exist_ok=True)
|
|
|
|
for i, image in enumerate(images):
|
|
image_path = os.path.join(buffer_folder_path, f"page_{i+1}.{output_format}")
|
|
image.save(image_path, output_format.upper())
|
|
|
|
except Exception as e:
|
|
print(f"Error converting {pdf_path}: {e}")
|
|
|
|
|
|
def convert_pdfs(pdf_folder_path, output_format="png", max_pages=None):
|
|
"""Converts all PDF files in a folder to images sequentially.
|
|
|
|
Args:
|
|
pdf_folder_path (str): Path to the folder containing PDF files.
|
|
output_format (str, optional): Desired output format for images (default: "png").
|
|
Supported formats are "png", "jpg", and "ppm".
|
|
max_pages (int, optional): Maximum number of pages to convert per PDF (default: None, all pages).
|
|
"""
|
|
|
|
for filename in os.listdir(pdf_folder_path):
|
|
if filename.endswith(".pdf"):
|
|
pdf_path = os.path.join(pdf_folder_path, filename)
|
|
convert_pdf_to_images(pdf_path, output_format, max_pages)
|
|
|
|
|
|
|
|
|
|
|