File size: 2,016 Bytes
b692870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
from pdf2image import convert_from_path


def convert_pdf_to_images(pdf_path, output_format="png", max_pages=None):
  """Converts a single PDF file to images.



  Args:

      pdf_path (str): Path to the PDF file.

      output_format (str, optional): Desired output format for images (default: "png").

          Supported formats are "png", "jpg", and "ppm".

      max_pages (int, optional): Maximum number of pages to convert (default: None, all pages).

  """

  try:
    pdf_name, _ = os.path.splitext(os.path.basename(pdf_path))  # Extract filename without extension
    images = convert_from_path(pdf_path, fmt=output_format, first_page=1, last_page=max_pages or None)  # Use None for all pages
    buffer_folder_path = os.path.join(os.path.dirname(pdf_path), pdf_name)  # Create folder next to the PDF
    os.makedirs(buffer_folder_path, exist_ok=True)  # Create if not exists

    for i, image in enumerate(images):
      image_path = os.path.join(buffer_folder_path, f"page_{i+1}.{output_format}")
      image.save(image_path, output_format.upper())  # Use uppercase extension

  except Exception as e:
    print(f"Error converting {pdf_path}: {e}")


def convert_pdfs(pdf_folder_path, output_format="png", max_pages=None):
  """Converts all PDF files in a folder to images sequentially.



  Args:

      pdf_folder_path (str): Path to the folder containing PDF files.

      output_format (str, optional): Desired output format for images (default: "png").

          Supported formats are "png", "jpg", and "ppm".

      max_pages (int, optional): Maximum number of pages to convert per PDF (default: None, all pages).

  """

  for filename in os.listdir(pdf_folder_path):
    if filename.endswith(".pdf"):
      pdf_path = os.path.join(pdf_folder_path, filename)
      convert_pdf_to_images(pdf_path, output_format, max_pages)


# Example usage
#convert_pdfs("input", output_format="png", max_pages=2)  # Convert PDFs to JPG, keeping only the first 2 pages