sschet commited on
Commit
b94ac18
·
1 Parent(s): 3e57ee8
Files changed (1) hide show
  1. main.py +64 -0
main.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import re
4
+ import pandas as pd, numpy as np
5
+ import comtypes.client
6
+ import docx
7
+ from docx.document import Document
8
+ from docx.oxml.table import CT_Tbl
9
+ from docx.oxml.text.paragraph import CT_P
10
+ from docx.table import _Cell, Table
11
+ from docx.text.paragraph import Paragraph
12
+ from flask import Flask, request, send_file
13
+ from werkzeug.utils import secure_filename
14
+ import zipfile
15
+ import io
16
+
17
+ app = Flask(__name__)
18
+
19
+ UPLOAD_FOLDER = 'uploads/'
20
+ if not os.path.exists(UPLOAD_FOLDER):
21
+ os.makedirs(UPLOAD_FOLDER)
22
+
23
+ @app.route('/convert', methods=['POST'])
24
+ def convert_pdfs():
25
+ if 'pdf_files' not in request.files:
26
+ return 'No file part', 400
27
+
28
+ pdf_files = request.files.getlist('pdf_files')
29
+ if not pdf_files:
30
+ return 'No selected file', 400
31
+
32
+ for pdf_file in pdf_files:
33
+ filename = secure_filename(pdf_file.filename)
34
+ pdf_file.save(os.path.join(UPLOAD_FOLDER, filename))
35
+
36
+ path_pdf = UPLOAD_FOLDER
37
+ path_docx = UPLOAD_FOLDER
38
+
39
+ word = comtypes.client.CreateObject('Word.Application')
40
+ word.visible = 0
41
+
42
+ for i, doc in enumerate(glob.iglob(path_pdf + '*.pdf')):
43
+ filename = doc.split('\\')[-1]
44
+ in_file = os.path.abspath(doc)
45
+ wdoc = word.Documents.Open(in_file)
46
+ out_file = os.path.abspath(path_docx + filename[:-4] + '.docx')
47
+ wdoc.SaveAs2(out_file, FileFormat=16)
48
+ wdoc.Close()
49
+
50
+ word.Quit()
51
+
52
+ memory_file = io.BytesIO()
53
+ with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
54
+ for root, _, filenames in os.walk(UPLOAD_FOLDER):
55
+ for filename in filenames:
56
+ if filename.endswith('.docx'):
57
+ filepath = os.path.join(root, filename)
58
+ zf.write(filepath, filename)
59
+
60
+ memory_file.seek(0)
61
+ return send_file(memory_file, attachment_filename='converted_docx.zip', as_attachment=True)
62
+
63
+ if __name__ == '__main__':
64
+ app.run(host='0.0.0.0', port=5000)