import os from flask import Flask, request, jsonify from werkzeug.utils import secure_filename from transformers import pipeline from pdf2image import convert_from_path from PIL import Image # Initialize Flask app app = Flask(__name__) # Set upload folder UPLOAD_FOLDER = 'uploads' os.makedirs(UPLOAD_FOLDER, exist_ok=True) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER # Allowed file extensions ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'pdf'} # Load TrOCR Model ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed") def allowed_file(filename): """Check if the file has an allowed extension.""" return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def extract_text_from_image(image_path): """Extract text from a single image using TrOCR.""" image = Image.open(image_path).convert("RGB") text = ocr_pipeline(image)[0]['generated_text'] return text def extract_text_from_pdf(pdf_path): """Convert PDF to images and extract text from each page.""" images = convert_from_path(pdf_path) extracted_text = "" for img in images: text = extract_text_from_image(img) extracted_text += text + "\n" return extracted_text.strip() @app.route('/upload', methods=['POST']) def upload_file(): """Handle file upload and text extraction.""" if 'file' not in request.files: return jsonify({"error": "No file uploaded"}), 400 file = request.files['file'] if file.filename == '': return jsonify({"error": "No file selected"}), 400 if not allowed_file(file.filename): return jsonify({"error": "Invalid file type. Allowed: PNG, JPG, JPEG, PDF."}), 400 # Save uploaded file filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) # Process image or PDF if filename.lower().endswith(".pdf"): extracted_text = extract_text_from_pdf(file_path) else: extracted_text = extract_text_from_image(file_path) return jsonify({"extracted_text": extracted_text}) # Run Flask App if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=True)