Update docxtoimage.py
Browse files- docxtoimage.py +47 -44
docxtoimage.py
CHANGED
@@ -1,45 +1,48 @@
|
|
1 |
-
import os
|
2 |
-
from spire.doc import *
|
3 |
-
from spire.doc.common import *
|
4 |
-
|
5 |
-
def process(folder_path,max_page):
|
6 |
-
for filename in os.listdir(folder_path):
|
7 |
-
if filename.endswith(".docx"):
|
8 |
-
process_docx(folder_path, filename,max_page)
|
9 |
-
|
10 |
-
def process_docx(folder_path, filename,max_page
|
11 |
-
try:
|
12 |
-
# Construct the full file path
|
13 |
-
file_path = os.path.join(folder_path, filename)
|
14 |
-
|
15 |
-
# Process the docx file
|
16 |
-
document = Document()
|
17 |
-
document.LoadFromFile(file_path)
|
18 |
-
if max_page
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
#
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
45 |
process(folder_path,max_page)
|
|
|
1 |
+
import os
|
2 |
+
from spire.doc import *
|
3 |
+
from spire.doc.common import *
|
4 |
+
|
5 |
+
def process(folder_path,max_page):
|
6 |
+
for filename in os.listdir(folder_path):
|
7 |
+
if filename.endswith(".docx"):
|
8 |
+
process_docx(folder_path, filename,max_page)
|
9 |
+
|
10 |
+
def process_docx(folder_path, filename,max_page):
|
11 |
+
try:
|
12 |
+
# Construct the full file path
|
13 |
+
file_path = os.path.join(folder_path, filename)
|
14 |
+
|
15 |
+
# Process the docx file
|
16 |
+
document = Document()
|
17 |
+
document.LoadFromFile(file_path)
|
18 |
+
if max_page is not None:
|
19 |
+
if max_page>document.GetPageCount():
|
20 |
+
image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
|
21 |
+
else:
|
22 |
+
image_streams = document.SaveImageToStreams(0,max_page ,ImageType.Bitmap)
|
23 |
+
if max_page is None:
|
24 |
+
max_page=document.GetPageCount
|
25 |
+
image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
|
26 |
+
# Extract the filename without extension
|
27 |
+
file_name, _ = os.path.splitext(filename)
|
28 |
+
|
29 |
+
# Create the folder path to save images
|
30 |
+
image_folder_path = os.path.join(folder_path, file_name)
|
31 |
+
os.makedirs(image_folder_path, exist_ok=True)
|
32 |
+
|
33 |
+
# Save each image stream to a JPG file
|
34 |
+
for i, image in enumerate(image_streams):
|
35 |
+
image_name = os.path.join(image_folder_path, f"{file_name}_{i+1}.png")
|
36 |
+
with open(image_name, 'wb') as image_file:
|
37 |
+
image_file.write(image.ToArray())
|
38 |
+
|
39 |
+
document.Close()
|
40 |
+
except Exception as e:
|
41 |
+
print(f"Error processing file {filename}: {e}")
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == '__main__':
|
45 |
+
# Define the folder path
|
46 |
+
folder_path = "input"
|
47 |
+
max_page=None
|
48 |
process(folder_path,max_page)
|