washeed commited on
Commit
e2ff69c
·
verified ·
1 Parent(s): b692870

Update docxtoimage.py

Browse files
Files changed (1) hide show
  1. docxtoimage.py +47 -44
docxtoimage.py CHANGED
@@ -1,45 +1,48 @@
1
- import os
2
- from spire.doc import *
3
- from spire.doc.common import *
4
-
5
- def process(folder_path,max_page):
6
- for filename in os.listdir(folder_path):
7
- if filename.endswith(".docx"):
8
- process_docx(folder_path, filename,max_page)
9
-
10
- def process_docx(folder_path, filename,max_page=None):
11
- try:
12
- # Construct the full file path
13
- file_path = os.path.join(folder_path, filename)
14
-
15
- # Process the docx file
16
- document = Document()
17
- document.LoadFromFile(file_path)
18
- if max_page>document.GetPageCount():
19
- image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
20
- else:
21
- image_streams = document.SaveImageToStreams(0,max_page ,ImageType.Bitmap)
22
-
23
- # Extract the filename without extension
24
- file_name, _ = os.path.splitext(filename)
25
-
26
- # Create the folder path to save images
27
- image_folder_path = os.path.join(folder_path, file_name)
28
- os.makedirs(image_folder_path, exist_ok=True)
29
-
30
- # Save each image stream to a JPG file
31
- for i, image in enumerate(image_streams):
32
- image_name = os.path.join(image_folder_path, f"{file_name}_{i+1}.png")
33
- with open(image_name, 'wb') as image_file:
34
- image_file.write(image.ToArray())
35
-
36
- document.Close()
37
- except Exception as e:
38
- print(f"Error processing file {filename}: {e}")
39
-
40
-
41
- if __name__ == '__main__':
42
- # Define the folder path
43
- folder_path = "input"
44
- max_page=4
 
 
 
45
  process(folder_path,max_page)
 
1
+ import os
2
+ from spire.doc import *
3
+ from spire.doc.common import *
4
+
5
+ def process(folder_path,max_page):
6
+ for filename in os.listdir(folder_path):
7
+ if filename.endswith(".docx"):
8
+ process_docx(folder_path, filename,max_page)
9
+
10
+ def process_docx(folder_path, filename,max_page):
11
+ try:
12
+ # Construct the full file path
13
+ file_path = os.path.join(folder_path, filename)
14
+
15
+ # Process the docx file
16
+ document = Document()
17
+ document.LoadFromFile(file_path)
18
+ if max_page is not None:
19
+ if max_page>document.GetPageCount():
20
+ image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
21
+ else:
22
+ image_streams = document.SaveImageToStreams(0,max_page ,ImageType.Bitmap)
23
+ if max_page is None:
24
+ max_page=document.GetPageCount
25
+ image_streams = document.SaveImageToStreams(0,document.GetPageCount() ,ImageType.Bitmap)
26
+ # Extract the filename without extension
27
+ file_name, _ = os.path.splitext(filename)
28
+
29
+ # Create the folder path to save images
30
+ image_folder_path = os.path.join(folder_path, file_name)
31
+ os.makedirs(image_folder_path, exist_ok=True)
32
+
33
+ # Save each image stream to a JPG file
34
+ for i, image in enumerate(image_streams):
35
+ image_name = os.path.join(image_folder_path, f"{file_name}_{i+1}.png")
36
+ with open(image_name, 'wb') as image_file:
37
+ image_file.write(image.ToArray())
38
+
39
+ document.Close()
40
+ except Exception as e:
41
+ print(f"Error processing file {filename}: {e}")
42
+
43
+
44
+ if __name__ == '__main__':
45
+ # Define the folder path
46
+ folder_path = "input"
47
+ max_page=None
48
  process(folder_path,max_page)