import os from spire.doc import * from spire.doc.common import * def process(folder_path,max_page): for filename in os.listdir(folder_path): if filename.endswith(".docx"): process_docx(folder_path, filename,max_page) def process_docx(folder_path, filename,max_page): try: # Construct the full file path file_path = os.path.join(folder_path, filename) # Process the docx file document = Document() document.LoadFromFile(file_path) if max_page is not None: if max_page>document.GetPageCount(): image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap) else: image_streams = document.SaveImageToStreams(0,max_page ,ImageType.Bitmap) if max_page is None: max_page=document.GetPageCount image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap) # Extract the filename without extension file_name, _ = os.path.splitext(filename) # Create the folder path to save images image_folder_path = os.path.join(folder_path, file_name) os.makedirs(image_folder_path, exist_ok=True) # Save each image stream to a JPG file for i, image in enumerate(image_streams): image_name = os.path.join(image_folder_path, f"{file_name}_{i+1}.png") with open(image_name, 'wb') as image_file: image_file.write(image.ToArray()) document.Close() except Exception as e: print(f"Error processing file {filename}: {e}") if __name__ == '__main__': # Define the folder path folder_path = "input" max_page=None process(folder_path,max_page)