Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from flask import Flask, request, jsonify
|
3 |
-
from transformers import pipeline
|
4 |
from werkzeug.utils import secure_filename
|
5 |
from pdf2image import convert_from_path
|
6 |
import pytesseract
|
@@ -16,17 +16,17 @@ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
|
16 |
|
17 |
# Load AI Pipelines
|
18 |
ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed") # OCR Model
|
19 |
-
text_gen_pipeline = pipeline("text-generation", model="
|
20 |
|
21 |
# Function to extract text from a PDF resume
|
22 |
def extract_text_from_pdf(pdf_path):
|
23 |
images = convert_from_path(pdf_path)
|
24 |
extracted_text = ""
|
25 |
-
|
26 |
for img in images:
|
27 |
text = pytesseract.image_to_string(img) # OCR extraction
|
28 |
extracted_text += text + "\n"
|
29 |
-
|
30 |
return extracted_text.strip()
|
31 |
|
32 |
# Route: Upload Resume & Generate Report
|
@@ -34,11 +34,11 @@ def extract_text_from_pdf(pdf_path):
|
|
34 |
def upload_resume():
|
35 |
if 'file' not in request.files:
|
36 |
return jsonify({"error": "No file uploaded"}), 400
|
37 |
-
|
38 |
file = request.files['file']
|
39 |
if file.filename == '':
|
40 |
return jsonify({"error": "No file selected"}), 400
|
41 |
-
|
42 |
# Save uploaded file
|
43 |
filename = secure_filename(file.filename)
|
44 |
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
@@ -56,7 +56,7 @@ def upload_resume():
|
|
56 |
"resume_text": extracted_text[:1000], # Limit to 1000 chars for display
|
57 |
"ai_evaluation": ai_evaluation
|
58 |
}
|
59 |
-
|
60 |
return jsonify(response)
|
61 |
|
62 |
# Run Flask App
|
|
|
1 |
import os
|
2 |
from flask import Flask, request, jsonify
|
3 |
+
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
4 |
from werkzeug.utils import secure_filename
|
5 |
from pdf2image import convert_from_path
|
6 |
import pytesseract
|
|
|
16 |
|
17 |
# Load AI Pipelines
|
18 |
ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed") # OCR Model
|
19 |
+
text_gen_pipeline = pipeline("text-generation", model="gpt2") # GPT-2 for text generation
|
20 |
|
21 |
# Function to extract text from a PDF resume
|
22 |
def extract_text_from_pdf(pdf_path):
|
23 |
images = convert_from_path(pdf_path)
|
24 |
extracted_text = ""
|
25 |
+
|
26 |
for img in images:
|
27 |
text = pytesseract.image_to_string(img) # OCR extraction
|
28 |
extracted_text += text + "\n"
|
29 |
+
|
30 |
return extracted_text.strip()
|
31 |
|
32 |
# Route: Upload Resume & Generate Report
|
|
|
34 |
def upload_resume():
|
35 |
if 'file' not in request.files:
|
36 |
return jsonify({"error": "No file uploaded"}), 400
|
37 |
+
|
38 |
file = request.files['file']
|
39 |
if file.filename == '':
|
40 |
return jsonify({"error": "No file selected"}), 400
|
41 |
+
|
42 |
# Save uploaded file
|
43 |
filename = secure_filename(file.filename)
|
44 |
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
|
56 |
"resume_text": extracted_text[:1000], # Limit to 1000 chars for display
|
57 |
"ai_evaluation": ai_evaluation
|
58 |
}
|
59 |
+
|
60 |
return jsonify(response)
|
61 |
|
62 |
# Run Flask App
|