import os from pdf2image import convert_from_path def convert_pdf_to_images(pdf_path, output_format="png", max_pages=None): """Converts a single PDF file to images. Args: pdf_path (str): Path to the PDF file. output_format (str, optional): Desired output format for images (default: "png"). Supported formats are "png", "jpg", and "ppm". max_pages (int, optional): Maximum number of pages to convert (default: None, all pages). """ try: pdf_name, _ = os.path.splitext(os.path.basename(pdf_path)) # Extract filename without extension images = convert_from_path(pdf_path, fmt=output_format, first_page=1, last_page=max_pages or None) # Use None for all pages buffer_folder_path = os.path.join(os.path.dirname(pdf_path), pdf_name) # Create folder next to the PDF os.makedirs(buffer_folder_path, exist_ok=True) # Create if not exists for i, image in enumerate(images): image_path = os.path.join(buffer_folder_path, f"page_{i+1}.{output_format}") image.save(image_path, output_format.upper()) # Use uppercase extension except Exception as e: print(f"Error converting {pdf_path}: {e}") def convert_pdfs(pdf_folder_path, output_format="png", max_pages=None): """Converts all PDF files in a folder to images sequentially. Args: pdf_folder_path (str): Path to the folder containing PDF files. output_format (str, optional): Desired output format for images (default: "png"). Supported formats are "png", "jpg", and "ppm". max_pages (int, optional): Maximum number of pages to convert per PDF (default: None, all pages). """ for filename in os.listdir(pdf_folder_path): if filename.endswith(".pdf"): pdf_path = os.path.join(pdf_folder_path, filename) convert_pdf_to_images(pdf_path, output_format, max_pages) # Example usage #convert_pdfs("input", output_format="png", max_pages=2) # Convert PDFs to JPG, keeping only the first 2 pages