File size: 1,662 Bytes
e2ff69c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b692870
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
from spire.doc import *
from spire.doc.common import *

def process(folder_path,max_page):
    for filename in os.listdir(folder_path):
        if filename.endswith(".docx"):
            process_docx(folder_path, filename,max_page)

def process_docx(folder_path, filename,max_page):
  try:
    # Construct the full file path
    file_path = os.path.join(folder_path, filename)

    # Process the docx file
    document = Document()
    document.LoadFromFile(file_path)
    if max_page is not None:
        if max_page>document.GetPageCount():
            image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
        else:
            image_streams = document.SaveImageToStreams(0,max_page ,ImageType.Bitmap)
    if max_page is None:
        max_page=document.GetPageCount
        image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
    # Extract the filename without extension
    file_name, _ = os.path.splitext(filename)

    # Create the folder path to save images
    image_folder_path = os.path.join(folder_path, file_name)
    os.makedirs(image_folder_path, exist_ok=True)

    # Save each image stream to a JPG file
    for i, image in enumerate(image_streams):
        image_name = os.path.join(image_folder_path, f"{file_name}_{i+1}.png")
        with open(image_name, 'wb') as image_file:
            image_file.write(image.ToArray())

    document.Close()
  except Exception as e:
    print(f"Error processing file {filename}: {e}")


if __name__ == '__main__':
    # Define the folder path
    folder_path = "input"
    max_page=None
    process(folder_path,max_page)