CR7CAD's picture
Create app.py
cf8a522 verified
raw
history blame
2.07 kB
import os
from flask import Flask, request, jsonify
from transformers import pipeline
from werkzeug.utils import secure_filename
from pdf2image import convert_from_path
import pytesseract
from PIL import Image
# Initialize Flask app
app = Flask(__name__)
# Set upload folder
UPLOAD_FOLDER = 'uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# Load AI Pipelines
ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed") # OCR Model
text_gen_pipeline = pipeline("text-generation", model="distilbert/distilgpt2") # Text Generation Model
# Function to extract text from a PDF resume
def extract_text_from_pdf(pdf_path):
images = convert_from_path(pdf_path)
extracted_text = ""
for img in images:
text = pytesseract.image_to_string(img) # OCR extraction
extracted_text += text + "\n"
return extracted_text.strip()
# Route: Upload Resume & Generate Report
@app.route('/upload', methods=['POST'])
def upload_resume():
if 'file' not in request.files:
return jsonify({"error": "No file uploaded"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No file selected"}), 400
# Save uploaded file
filename = secure_filename(file.filename)
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(file_path)
# Extract text from PDF
extracted_text = extract_text_from_pdf(file_path)
# Generate AI evaluation
prompt = f"Candidate Resume: {extracted_text}\n\nEvaluate the suitability of this candidate for a software engineering role at Google."
ai_evaluation = text_gen_pipeline(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"]
# Return response
response = {
"resume_text": extracted_text[:1000], # Limit to 1000 chars for display
"ai_evaluation": ai_evaluation
}
return jsonify(response)
# Run Flask App
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)