File size: 4,028 Bytes
dbf9148
40c3856
e4d75fe
f3a52d3
dbf9148
f3a52d3
e4d75fe
40c3856
 
 
f3a52d3
 
 
 
 
dbf9148
 
e4d75fe
f3a52d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f99efbd
 
 
 
f3a52d3
 
 
 
dbf9148
 
f3a52d3
dbf9148
 
f3a52d3
dbf9148
f3a52d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f99efbd
f3a52d3
 
 
 
f99efbd
 
 
f3a52d3
 
 
 
 
 
 
 
 
 
 
 
 
f99efbd
f3a52d3
 
 
f99efbd
f3a52d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4d75fe
f3a52d3
 
 
40c3856
dbf9148
f3a52d3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import os
import tempfile
from pdf2docx import Converter
from werkzeug.utils import secure_filename

app = Flask(__name__)
CORS(app)

# Use system temp directory for better cross-platform compatibility
UPLOAD_FOLDER = os.path.join(tempfile.gettempdir(), "pdf_uploads")
CONVERTED_FOLDER = os.path.join(tempfile.gettempdir(), "pdf_converted")

# Create directories
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(CONVERTED_FOLDER, exist_ok=True)

# Configuration
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max file size
ALLOWED_EXTENSIONS = {'pdf'}

def allowed_file(filename):
    """Check if file has allowed extension"""
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def cleanup_file(filepath):
    """Safely remove file if it exists"""
    try:
        if os.path.exists(filepath):
            os.remove(filepath)
    except Exception as e:
        print(f"Warning: Could not remove file {filepath}: {e}")

@app.route("/", methods=["GET"])
def index():
    return "✅ PDF to DOCX Converter is running!"

@app.route("/health", methods=["GET"])
def health_check():
    return jsonify({"status": "healthy", "service": "PDF to DOCX Converter"})

@app.route("/convert", methods=["POST"])
def convert_pdf_to_docx():
    # Check if file is present
    if "file" not in request.files:
        return jsonify({"error": "No file uploaded"}), 400
    
    pdf_file = request.files["file"]
    
    # Check if file is selected
    if pdf_file.filename == '':
        return jsonify({"error": "No file selected"}), 400
    
    # Validate file type
    if not allowed_file(pdf_file.filename):
        return jsonify({"error": "Only PDF files are allowed"}), 400
    
    # Secure the filename
    filename = secure_filename(pdf_file.filename)
    if not filename:
        return jsonify({"error": "Invalid filename"}), 400
    
    # Create unique paths to avoid conflicts
    import time
    timestamp = str(int(time.time()))
    pdf_path = os.path.join(UPLOAD_FOLDER, f"{timestamp}_{filename}")
    
    # Generate DOCX filename
    docx_filename = os.path.splitext(filename)[0] + ".docx"
    docx_path = os.path.join(CONVERTED_FOLDER, f"{timestamp}_{docx_filename}")
    
    try:
        # Save uploaded file
        pdf_file.save(pdf_path)
        
        # Convert PDF to DOCX
        cv = Converter(pdf_path)
        cv.convert(docx_path, start=0, end=None)
        cv.close()
        
        # Check if conversion was successful
        if not os.path.exists(docx_path):
            raise Exception("Conversion completed but output file not found")
        
        # Send file and cleanup in finally block
        return send_file(
            docx_path, 
            as_attachment=True,
            download_name=docx_filename,
            mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
        )
        
    except Exception as e:
        # Cleanup on error
        cleanup_file(pdf_path)
        cleanup_file(docx_path)
        return jsonify({"error": f"Conversion failed: {str(e)}"}), 500
    
    finally:
        # Schedule cleanup of uploaded file (converted file cleaned up after send)
        try:
            cleanup_file(pdf_path)
        except:
            pass

@app.after_request
def cleanup_converted_files(response):
    """Clean up converted files after response is sent"""
    # This is a simple cleanup - in production you might want a more sophisticated approach
    return response

@app.errorhandler(413)
def too_large(e):
    return jsonify({"error": "File too large. Maximum size is 16MB"}), 413

@app.errorhandler(Exception)
def handle_exception(e):
    return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500

if __name__ == "__main__":
    print(f"Upload folder: {UPLOAD_FOLDER}")
    print(f"Converted folder: {CONVERTED_FOLDER}")
    app.run(host="0.0.0.0", port=7860, debug=False)