mike23415 commited on
Commit
f3a52d3
·
verified ·
1 Parent(s): f99efbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -11
app.py CHANGED
@@ -1,40 +1,125 @@
1
  from flask import Flask, request, jsonify, send_file
2
  from flask_cors import CORS
3
  import os
 
4
  from pdf2docx import Converter
 
5
 
6
  app = Flask(__name__)
7
  CORS(app)
8
 
9
- UPLOAD_FOLDER = "/tmp/uploads"
10
- CONVERTED_FOLDER = "/tmp/converted"
 
 
 
11
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
12
  os.makedirs(CONVERTED_FOLDER, exist_ok=True)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  @app.route("/", methods=["GET"])
15
  def index():
16
  return "✅ PDF to DOCX Converter is running!"
17
 
 
 
 
 
18
  @app.route("/convert", methods=["POST"])
19
  def convert_pdf_to_docx():
 
20
  if "file" not in request.files:
21
  return jsonify({"error": "No file uploaded"}), 400
22
-
23
  pdf_file = request.files["file"]
24
- pdf_path = os.path.join(UPLOAD_FOLDER, pdf_file.filename)
25
- pdf_file.save(pdf_path)
26
-
27
- docx_filename = os.path.splitext(pdf_file.filename)[0] + ".docx"
28
- docx_path = os.path.join(CONVERTED_FOLDER, docx_filename)
29
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  try:
 
 
 
 
31
  cv = Converter(pdf_path)
32
  cv.convert(docx_path, start=0, end=None)
33
  cv.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  except Exception as e:
 
 
 
35
  return jsonify({"error": f"Conversion failed: {str(e)}"}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- return send_file(docx_path, as_attachment=True)
 
 
38
 
39
  if __name__ == "__main__":
40
- app.run(host="0.0.0.0", port=7860)
 
 
 
1
  from flask import Flask, request, jsonify, send_file
2
  from flask_cors import CORS
3
  import os
4
+ import tempfile
5
  from pdf2docx import Converter
6
+ from werkzeug.utils import secure_filename
7
 
8
  app = Flask(__name__)
9
  CORS(app)
10
 
11
+ # Use system temp directory for better cross-platform compatibility
12
+ UPLOAD_FOLDER = os.path.join(tempfile.gettempdir(), "pdf_uploads")
13
+ CONVERTED_FOLDER = os.path.join(tempfile.gettempdir(), "pdf_converted")
14
+
15
+ # Create directories
16
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
17
  os.makedirs(CONVERTED_FOLDER, exist_ok=True)
18
 
19
+ # Configuration
20
+ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
21
+ ALLOWED_EXTENSIONS = {'pdf'}
22
+
23
+ def allowed_file(filename):
24
+ """Check if file has allowed extension"""
25
+ return '.' in filename and \
26
+ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
27
+
28
+ def cleanup_file(filepath):
29
+ """Safely remove file if it exists"""
30
+ try:
31
+ if os.path.exists(filepath):
32
+ os.remove(filepath)
33
+ except Exception as e:
34
+ print(f"Warning: Could not remove file {filepath}: {e}")
35
+
36
  @app.route("/", methods=["GET"])
37
  def index():
38
  return "✅ PDF to DOCX Converter is running!"
39
 
40
+ @app.route("/health", methods=["GET"])
41
+ def health_check():
42
+ return jsonify({"status": "healthy", "service": "PDF to DOCX Converter"})
43
+
44
  @app.route("/convert", methods=["POST"])
45
  def convert_pdf_to_docx():
46
+ # Check if file is present
47
  if "file" not in request.files:
48
  return jsonify({"error": "No file uploaded"}), 400
49
+
50
  pdf_file = request.files["file"]
51
+
52
+ # Check if file is selected
53
+ if pdf_file.filename == '':
54
+ return jsonify({"error": "No file selected"}), 400
55
+
56
+ # Validate file type
57
+ if not allowed_file(pdf_file.filename):
58
+ return jsonify({"error": "Only PDF files are allowed"}), 400
59
+
60
+ # Secure the filename
61
+ filename = secure_filename(pdf_file.filename)
62
+ if not filename:
63
+ return jsonify({"error": "Invalid filename"}), 400
64
+
65
+ # Create unique paths to avoid conflicts
66
+ import time
67
+ timestamp = str(int(time.time()))
68
+ pdf_path = os.path.join(UPLOAD_FOLDER, f"{timestamp}_{filename}")
69
+
70
+ # Generate DOCX filename
71
+ docx_filename = os.path.splitext(filename)[0] + ".docx"
72
+ docx_path = os.path.join(CONVERTED_FOLDER, f"{timestamp}_{docx_filename}")
73
+
74
  try:
75
+ # Save uploaded file
76
+ pdf_file.save(pdf_path)
77
+
78
+ # Convert PDF to DOCX
79
  cv = Converter(pdf_path)
80
  cv.convert(docx_path, start=0, end=None)
81
  cv.close()
82
+
83
+ # Check if conversion was successful
84
+ if not os.path.exists(docx_path):
85
+ raise Exception("Conversion completed but output file not found")
86
+
87
+ # Send file and cleanup in finally block
88
+ return send_file(
89
+ docx_path,
90
+ as_attachment=True,
91
+ download_name=docx_filename,
92
+ mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
93
+ )
94
+
95
  except Exception as e:
96
+ # Cleanup on error
97
+ cleanup_file(pdf_path)
98
+ cleanup_file(docx_path)
99
  return jsonify({"error": f"Conversion failed: {str(e)}"}), 500
100
+
101
+ finally:
102
+ # Schedule cleanup of uploaded file (converted file cleaned up after send)
103
+ try:
104
+ cleanup_file(pdf_path)
105
+ except:
106
+ pass
107
+
108
+ @app.after_request
109
+ def cleanup_converted_files(response):
110
+ """Clean up converted files after response is sent"""
111
+ # This is a simple cleanup - in production you might want a more sophisticated approach
112
+ return response
113
+
114
+ @app.errorhandler(413)
115
+ def too_large(e):
116
+ return jsonify({"error": "File too large. Maximum size is 16MB"}), 413
117
 
118
+ @app.errorhandler(Exception)
119
+ def handle_exception(e):
120
+ return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
121
 
122
  if __name__ == "__main__":
123
+ print(f"Upload folder: {UPLOAD_FOLDER}")
124
+ print(f"Converted folder: {CONVERTED_FOLDER}")
125
+ app.run(host="0.0.0.0", port=7860, debug=False)