CR7CAD commited on
Commit
fda9c54
·
verified ·
1 Parent(s): 885b5d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -22
app.py CHANGED
@@ -1,9 +1,8 @@
1
  import os
2
  from flask import Flask, request, jsonify
3
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
4
  from werkzeug.utils import secure_filename
 
5
  from pdf2image import convert_from_path
6
- import pytesseract
7
  from PIL import Image
8
 
9
  # Initialize Flask app
@@ -14,24 +13,36 @@ UPLOAD_FOLDER = 'uploads'
14
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
15
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
16
 
17
- # Load AI Pipelines
18
- ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed") # OCR Model
19
- text_gen_pipeline = pipeline("text-generation", model="gpt2") # GPT-2 for text generation
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Function to extract text from a PDF resume
22
  def extract_text_from_pdf(pdf_path):
 
23
  images = convert_from_path(pdf_path)
24
  extracted_text = ""
25
 
26
  for img in images:
27
- text = pytesseract.image_to_string(img) # OCR extraction
28
  extracted_text += text + "\n"
29
 
30
  return extracted_text.strip()
31
 
32
- # Route: Upload Resume & Generate Report
33
  @app.route('/upload', methods=['POST'])
34
- def upload_resume():
 
35
  if 'file' not in request.files:
36
  return jsonify({"error": "No file uploaded"}), 400
37
 
@@ -39,25 +50,21 @@ def upload_resume():
39
  if file.filename == '':
40
  return jsonify({"error": "No file selected"}), 400
41
 
 
 
 
42
  # Save uploaded file
43
  filename = secure_filename(file.filename)
44
  file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
45
  file.save(file_path)
46
 
47
- # Extract text from PDF
48
- extracted_text = extract_text_from_pdf(file_path)
49
-
50
- # Generate AI evaluation
51
- prompt = f"Candidate Resume: {extracted_text}\n\nEvaluate the suitability of this candidate for a software engineering role at Google."
52
- ai_evaluation = text_gen_pipeline(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"]
53
-
54
- # Return response
55
- response = {
56
- "resume_text": extracted_text[:1000], # Limit to 1000 chars for display
57
- "ai_evaluation": ai_evaluation
58
- }
59
 
60
- return jsonify(response)
61
 
62
  # Run Flask App
63
  if __name__ == '__main__':
 
1
  import os
2
  from flask import Flask, request, jsonify
 
3
  from werkzeug.utils import secure_filename
4
+ from transformers import pipeline
5
  from pdf2image import convert_from_path
 
6
  from PIL import Image
7
 
8
  # Initialize Flask app
 
13
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
14
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
15
 
16
+ # Allowed file extensions
17
+ ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'pdf'}
18
+
19
+ # Load TrOCR Model
20
+ ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed")
21
+
22
+ def allowed_file(filename):
23
+ """Check if the file has an allowed extension."""
24
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
25
+
26
+ def extract_text_from_image(image_path):
27
+ """Extract text from a single image using TrOCR."""
28
+ image = Image.open(image_path).convert("RGB")
29
+ text = ocr_pipeline(image)[0]['generated_text']
30
+ return text
31
 
 
32
  def extract_text_from_pdf(pdf_path):
33
+ """Convert PDF to images and extract text from each page."""
34
  images = convert_from_path(pdf_path)
35
  extracted_text = ""
36
 
37
  for img in images:
38
+ text = extract_text_from_image(img)
39
  extracted_text += text + "\n"
40
 
41
  return extracted_text.strip()
42
 
 
43
  @app.route('/upload', methods=['POST'])
44
+ def upload_file():
45
+ """Handle file upload and text extraction."""
46
  if 'file' not in request.files:
47
  return jsonify({"error": "No file uploaded"}), 400
48
 
 
50
  if file.filename == '':
51
  return jsonify({"error": "No file selected"}), 400
52
 
53
+ if not allowed_file(file.filename):
54
+ return jsonify({"error": "Invalid file type. Allowed: PNG, JPG, JPEG, PDF."}), 400
55
+
56
  # Save uploaded file
57
  filename = secure_filename(file.filename)
58
  file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
59
  file.save(file_path)
60
 
61
+ # Process image or PDF
62
+ if filename.lower().endswith(".pdf"):
63
+ extracted_text = extract_text_from_pdf(file_path)
64
+ else:
65
+ extracted_text = extract_text_from_image(file_path)
 
 
 
 
 
 
 
66
 
67
+ return jsonify({"extracted_text": extracted_text})
68
 
69
  # Run Flask App
70
  if __name__ == '__main__':