|
from flask import Flask, request, jsonify, send_file |
|
from flask_cors import CORS |
|
import os |
|
import tempfile |
|
from pdf2docx import Converter |
|
from werkzeug.utils import secure_filename |
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
|
|
|
|
UPLOAD_FOLDER = os.path.join(tempfile.gettempdir(), "pdf_uploads") |
|
CONVERTED_FOLDER = os.path.join(tempfile.gettempdir(), "pdf_converted") |
|
|
|
|
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True) |
|
os.makedirs(CONVERTED_FOLDER, exist_ok=True) |
|
|
|
|
|
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 |
|
ALLOWED_EXTENSIONS = {'pdf'} |
|
|
|
def allowed_file(filename): |
|
"""Check if file has allowed extension""" |
|
return '.' in filename and \ |
|
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS |
|
|
|
def cleanup_file(filepath): |
|
"""Safely remove file if it exists""" |
|
try: |
|
if os.path.exists(filepath): |
|
os.remove(filepath) |
|
except Exception as e: |
|
print(f"Warning: Could not remove file {filepath}: {e}") |
|
|
|
@app.route("/", methods=["GET"]) |
|
def index(): |
|
return "β
PDF to DOCX Converter is running!" |
|
|
|
@app.route("/health", methods=["GET"]) |
|
def health_check(): |
|
return jsonify({"status": "healthy", "service": "PDF to DOCX Converter"}) |
|
|
|
@app.route("/convert", methods=["POST"]) |
|
def convert_pdf_to_docx(): |
|
|
|
if "file" not in request.files: |
|
return jsonify({"error": "No file uploaded"}), 400 |
|
|
|
pdf_file = request.files["file"] |
|
|
|
|
|
if pdf_file.filename == '': |
|
return jsonify({"error": "No file selected"}), 400 |
|
|
|
|
|
if not allowed_file(pdf_file.filename): |
|
return jsonify({"error": "Only PDF files are allowed"}), 400 |
|
|
|
|
|
filename = secure_filename(pdf_file.filename) |
|
if not filename: |
|
return jsonify({"error": "Invalid filename"}), 400 |
|
|
|
|
|
import time |
|
timestamp = str(int(time.time())) |
|
pdf_path = os.path.join(UPLOAD_FOLDER, f"{timestamp}_{filename}") |
|
|
|
|
|
docx_filename = os.path.splitext(filename)[0] + ".docx" |
|
docx_path = os.path.join(CONVERTED_FOLDER, f"{timestamp}_{docx_filename}") |
|
|
|
try: |
|
|
|
pdf_file.save(pdf_path) |
|
|
|
|
|
cv = Converter(pdf_path) |
|
cv.convert(docx_path, start=0, end=None) |
|
cv.close() |
|
|
|
|
|
if not os.path.exists(docx_path): |
|
raise Exception("Conversion completed but output file not found") |
|
|
|
|
|
return send_file( |
|
docx_path, |
|
as_attachment=True, |
|
download_name=docx_filename, |
|
mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document' |
|
) |
|
|
|
except Exception as e: |
|
|
|
cleanup_file(pdf_path) |
|
cleanup_file(docx_path) |
|
return jsonify({"error": f"Conversion failed: {str(e)}"}), 500 |
|
|
|
finally: |
|
|
|
try: |
|
cleanup_file(pdf_path) |
|
except: |
|
pass |
|
|
|
@app.after_request |
|
def cleanup_converted_files(response): |
|
"""Clean up converted files after response is sent""" |
|
|
|
return response |
|
|
|
@app.errorhandler(413) |
|
def too_large(e): |
|
return jsonify({"error": "File too large. Maximum size is 16MB"}), 413 |
|
|
|
@app.errorhandler(Exception) |
|
def handle_exception(e): |
|
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500 |
|
|
|
if __name__ == "__main__": |
|
print(f"Upload folder: {UPLOAD_FOLDER}") |
|
print(f"Converted folder: {CONVERTED_FOLDER}") |
|
app.run(host="0.0.0.0", port=7860, debug=False) |