CR7CAD commited on
Commit
885b5d3
·
verified ·
1 Parent(s): bbdfbb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  from flask import Flask, request, jsonify
3
- from transformers import pipeline
4
  from werkzeug.utils import secure_filename
5
  from pdf2image import convert_from_path
6
  import pytesseract
@@ -16,17 +16,17 @@ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
16
 
17
  # Load AI Pipelines
18
  ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed") # OCR Model
19
- text_gen_pipeline = pipeline("text-generation", model="distilbert/distilgpt2") # Text Generation Model
20
 
21
  # Function to extract text from a PDF resume
22
  def extract_text_from_pdf(pdf_path):
23
  images = convert_from_path(pdf_path)
24
  extracted_text = ""
25
-
26
  for img in images:
27
  text = pytesseract.image_to_string(img) # OCR extraction
28
  extracted_text += text + "\n"
29
-
30
  return extracted_text.strip()
31
 
32
  # Route: Upload Resume & Generate Report
@@ -34,11 +34,11 @@ def extract_text_from_pdf(pdf_path):
34
  def upload_resume():
35
  if 'file' not in request.files:
36
  return jsonify({"error": "No file uploaded"}), 400
37
-
38
  file = request.files['file']
39
  if file.filename == '':
40
  return jsonify({"error": "No file selected"}), 400
41
-
42
  # Save uploaded file
43
  filename = secure_filename(file.filename)
44
  file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
@@ -56,7 +56,7 @@ def upload_resume():
56
  "resume_text": extracted_text[:1000], # Limit to 1000 chars for display
57
  "ai_evaluation": ai_evaluation
58
  }
59
-
60
  return jsonify(response)
61
 
62
  # Run Flask App
 
1
  import os
2
  from flask import Flask, request, jsonify
3
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
4
  from werkzeug.utils import secure_filename
5
  from pdf2image import convert_from_path
6
  import pytesseract
 
16
 
17
  # Load AI Pipelines
18
  ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed") # OCR Model
19
+ text_gen_pipeline = pipeline("text-generation", model="gpt2") # GPT-2 for text generation
20
 
21
  # Function to extract text from a PDF resume
22
  def extract_text_from_pdf(pdf_path):
23
  images = convert_from_path(pdf_path)
24
  extracted_text = ""
25
+
26
  for img in images:
27
  text = pytesseract.image_to_string(img) # OCR extraction
28
  extracted_text += text + "\n"
29
+
30
  return extracted_text.strip()
31
 
32
  # Route: Upload Resume & Generate Report
 
34
  def upload_resume():
35
  if 'file' not in request.files:
36
  return jsonify({"error": "No file uploaded"}), 400
37
+
38
  file = request.files['file']
39
  if file.filename == '':
40
  return jsonify({"error": "No file selected"}), 400
41
+
42
  # Save uploaded file
43
  filename = secure_filename(file.filename)
44
  file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
 
56
  "resume_text": extracted_text[:1000], # Limit to 1000 chars for display
57
  "ai_evaluation": ai_evaluation
58
  }
59
+
60
  return jsonify(response)
61
 
62
  # Run Flask App