ORC / app.py
mike23415's picture
Update app.py
f3a52d3 verified
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import os
import tempfile
from pdf2docx import Converter
from werkzeug.utils import secure_filename
app = Flask(__name__)
CORS(app)
# Use system temp directory for better cross-platform compatibility
UPLOAD_FOLDER = os.path.join(tempfile.gettempdir(), "pdf_uploads")
CONVERTED_FOLDER = os.path.join(tempfile.gettempdir(), "pdf_converted")
# Create directories
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(CONVERTED_FOLDER, exist_ok=True)
# Configuration
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
ALLOWED_EXTENSIONS = {'pdf'}
def allowed_file(filename):
"""Check if file has allowed extension"""
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def cleanup_file(filepath):
"""Safely remove file if it exists"""
try:
if os.path.exists(filepath):
os.remove(filepath)
except Exception as e:
print(f"Warning: Could not remove file {filepath}: {e}")
@app.route("/", methods=["GET"])
def index():
return "βœ… PDF to DOCX Converter is running!"
@app.route("/health", methods=["GET"])
def health_check():
return jsonify({"status": "healthy", "service": "PDF to DOCX Converter"})
@app.route("/convert", methods=["POST"])
def convert_pdf_to_docx():
# Check if file is present
if "file" not in request.files:
return jsonify({"error": "No file uploaded"}), 400
pdf_file = request.files["file"]
# Check if file is selected
if pdf_file.filename == '':
return jsonify({"error": "No file selected"}), 400
# Validate file type
if not allowed_file(pdf_file.filename):
return jsonify({"error": "Only PDF files are allowed"}), 400
# Secure the filename
filename = secure_filename(pdf_file.filename)
if not filename:
return jsonify({"error": "Invalid filename"}), 400
# Create unique paths to avoid conflicts
import time
timestamp = str(int(time.time()))
pdf_path = os.path.join(UPLOAD_FOLDER, f"{timestamp}_{filename}")
# Generate DOCX filename
docx_filename = os.path.splitext(filename)[0] + ".docx"
docx_path = os.path.join(CONVERTED_FOLDER, f"{timestamp}_{docx_filename}")
try:
# Save uploaded file
pdf_file.save(pdf_path)
# Convert PDF to DOCX
cv = Converter(pdf_path)
cv.convert(docx_path, start=0, end=None)
cv.close()
# Check if conversion was successful
if not os.path.exists(docx_path):
raise Exception("Conversion completed but output file not found")
# Send file and cleanup in finally block
return send_file(
docx_path,
as_attachment=True,
download_name=docx_filename,
mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
except Exception as e:
# Cleanup on error
cleanup_file(pdf_path)
cleanup_file(docx_path)
return jsonify({"error": f"Conversion failed: {str(e)}"}), 500
finally:
# Schedule cleanup of uploaded file (converted file cleaned up after send)
try:
cleanup_file(pdf_path)
except:
pass
@app.after_request
def cleanup_converted_files(response):
"""Clean up converted files after response is sent"""
# This is a simple cleanup - in production you might want a more sophisticated approach
return response
@app.errorhandler(413)
def too_large(e):
return jsonify({"error": "File too large. Maximum size is 16MB"}), 413
@app.errorhandler(Exception)
def handle_exception(e):
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
if __name__ == "__main__":
print(f"Upload folder: {UPLOAD_FOLDER}")
print(f"Converted folder: {CONVERTED_FOLDER}")
app.run(host="0.0.0.0", port=7860, debug=False)