import os from flask import Flask, request, jsonify from transformers import pipeline from werkzeug.utils import secure_filename from pdf2image import convert_from_path import pytesseract from PIL import Image # Initialize Flask app app = Flask(__name__) # Set upload folder UPLOAD_FOLDER = 'uploads' os.makedirs(UPLOAD_FOLDER, exist_ok=True) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER # Load AI Pipelines ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed") # OCR Model text_gen_pipeline = pipeline("text-generation", model="distilbert/distilgpt2") # Text Generation Model # Function to extract text from a PDF resume def extract_text_from_pdf(pdf_path): images = convert_from_path(pdf_path) extracted_text = "" for img in images: text = pytesseract.image_to_string(img) # OCR extraction extracted_text += text + "\n" return extracted_text.strip() # Route: Upload Resume & Generate Report @app.route('/upload', methods=['POST']) def upload_resume(): if 'file' not in request.files: return jsonify({"error": "No file uploaded"}), 400 file = request.files['file'] if file.filename == '': return jsonify({"error": "No file selected"}), 400 # Save uploaded file filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) # Extract text from PDF extracted_text = extract_text_from_pdf(file_path) # Generate AI evaluation prompt = f"Candidate Resume: {extracted_text}\n\nEvaluate the suitability of this candidate for a software engineering role at Google." ai_evaluation = text_gen_pipeline(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"] # Return response response = { "resume_text": extracted_text[:1000], # Limit to 1000 chars for display "ai_evaluation": ai_evaluation } return jsonify(response) # Run Flask App if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=True)