import os
from spire.doc import *
from spire.doc.common import *

def process(folder_path,max_page):
    for filename in os.listdir(folder_path):
        if filename.endswith(".docx"):
            process_docx(folder_path, filename,max_page)

def process_docx(folder_path, filename,max_page):
  try:
    # Construct the full file path
    file_path = os.path.join(folder_path, filename)

    # Process the docx file
    document = Document()
    document.LoadFromFile(file_path)
    if max_page is not None:
        if max_page>document.GetPageCount():
            image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
        else:
            image_streams = document.SaveImageToStreams(0,max_page ,ImageType.Bitmap)
    if max_page is None:
        max_page=document.GetPageCount
        image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
    # Extract the filename without extension
    file_name, _ = os.path.splitext(filename)

    # Create the folder path to save images
    image_folder_path = os.path.join(folder_path, file_name)
    os.makedirs(image_folder_path, exist_ok=True)

    # Save each image stream to a JPG file
    for i, image in enumerate(image_streams):
        image_name = os.path.join(image_folder_path, f"{file_name}_{i+1}.png")
        with open(image_name, 'wb') as image_file:
            image_file.write(image.ToArray())

    document.Close()
  except Exception as e:
    print(f"Error processing file {filename}: {e}")


if __name__ == '__main__':
    # Define the folder path
    folder_path = "input"
    max_page=None
    process(folder_path,max_page)