Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ import os
|
|
8 |
|
9 |
# Function to login using Hugging Face API token
|
10 |
def login_with_token(hf_token):
|
11 |
-
"""Login to Hugging Face using provided token"""
|
12 |
try:
|
13 |
login(token=hf_token)
|
14 |
return "Logged in successfully!"
|
@@ -19,46 +19,51 @@ def login_with_token(hf_token):
|
|
19 |
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
|
20 |
|
21 |
def extract_text_from_pdf(pdf_file):
|
22 |
-
"""Extracts text from a PDF file using pdfplumber"""
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def parse_resume(pdf_file):
|
30 |
-
"""Parses the resume and extracts relevant information"""
|
31 |
# Extract text from PDF
|
32 |
resume_text = extract_text_from_pdf(pdf_file)
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
|
38 |
# Use the NER model to identify entities in the resume
|
39 |
entities = nlp(resume_text)
|
40 |
|
41 |
-
# Log the NER output for debugging
|
42 |
-
print("NER Output:")
|
43 |
-
print(entities)
|
44 |
-
|
45 |
# Initialize empty fields
|
46 |
name = email = phone = education = skills = experience = None
|
47 |
|
48 |
# Example parsing logic based on NER output
|
49 |
for entity in entities:
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
62 |
print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}")
|
63 |
|
64 |
return {
|
@@ -71,22 +76,23 @@ def parse_resume(pdf_file):
|
|
71 |
}
|
72 |
|
73 |
def batch_process_resumes(pdf_files):
|
74 |
-
"""
|
75 |
all_resumes = []
|
76 |
for pdf_file in pdf_files:
|
77 |
resume_info = parse_resume(pdf_file)
|
78 |
|
79 |
# Only add the parsed resume info if there's meaningful data
|
80 |
-
if any(resume_info.values()):
|
81 |
all_resumes.append(resume_info)
|
82 |
|
|
|
|
|
|
|
|
|
|
|
83 |
# Convert to DataFrame
|
84 |
df = pd.DataFrame(all_resumes)
|
85 |
|
86 |
-
# If the DataFrame is empty, return a message indicating no data was found
|
87 |
-
if df.empty:
|
88 |
-
return "No valid resume information was parsed."
|
89 |
-
|
90 |
# Define the file path for the Excel file
|
91 |
output_file = "/tmp/parsed_resumes.xlsx"
|
92 |
|
@@ -120,10 +126,13 @@ with gr.Blocks() as demo:
|
|
120 |
# Attempt to log in with provided token
|
121 |
login_message = login_with_token(hf_token)
|
122 |
|
123 |
-
# If login is successful, process resumes and generate the download link
|
124 |
if "Error" not in login_message:
|
|
|
125 |
excel_file_path = batch_process_resumes(pdf_files)
|
126 |
-
|
|
|
|
|
|
|
127 |
else:
|
128 |
return login_message, None
|
129 |
|
|
|
8 |
|
9 |
# Function to login using Hugging Face API token
|
10 |
def login_with_token(hf_token):
|
11 |
+
"""Login to Hugging Face using provided token."""
|
12 |
try:
|
13 |
login(token=hf_token)
|
14 |
return "Logged in successfully!"
|
|
|
19 |
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
|
20 |
|
21 |
def extract_text_from_pdf(pdf_file):
|
22 |
+
"""Extracts text from a PDF file using pdfplumber."""
|
23 |
+
try:
|
24 |
+
with pdfplumber.open(pdf_file.name) as pdf:
|
25 |
+
text = ''
|
26 |
+
for page in pdf.pages:
|
27 |
+
text += page.extract_text() or "" # Handle pages with no text
|
28 |
+
return text
|
29 |
+
except Exception as e:
|
30 |
+
print(f"Error reading PDF: {e}")
|
31 |
+
return ""
|
32 |
|
33 |
def parse_resume(pdf_file):
|
34 |
+
"""Parses the resume and extracts relevant information."""
|
35 |
# Extract text from PDF
|
36 |
resume_text = extract_text_from_pdf(pdf_file)
|
37 |
|
38 |
+
if not resume_text.strip():
|
39 |
+
print("No text found in PDF.")
|
40 |
+
return {}
|
41 |
|
42 |
# Use the NER model to identify entities in the resume
|
43 |
entities = nlp(resume_text)
|
44 |
|
|
|
|
|
|
|
|
|
45 |
# Initialize empty fields
|
46 |
name = email = phone = education = skills = experience = None
|
47 |
|
48 |
# Example parsing logic based on NER output
|
49 |
for entity in entities:
|
50 |
+
label = entity.get("entity", "")
|
51 |
+
word = entity.get("word", "").strip()
|
52 |
+
|
53 |
+
if label == "B-PER" or label == "I-PER":
|
54 |
+
name = (name or "") + word + " "
|
55 |
+
elif label == "B-ORG" or label == "I-ORG":
|
56 |
+
experience = (experience or "") + word + " "
|
57 |
+
elif "@" in word: # Simple email detection
|
58 |
+
email = word
|
59 |
+
elif label == "I-MISC":
|
60 |
+
skills = (skills or "") + word + ", "
|
61 |
+
|
62 |
+
# Clean up trailing spaces and commas
|
63 |
+
name = name.strip() if name else None
|
64 |
+
skills = skills.rstrip(", ") if skills else None
|
65 |
+
|
66 |
+
# Log the final parsed information
|
67 |
print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}")
|
68 |
|
69 |
return {
|
|
|
76 |
}
|
77 |
|
78 |
def batch_process_resumes(pdf_files):
|
79 |
+
"""Processes a batch of resume PDFs and outputs an Excel file."""
|
80 |
all_resumes = []
|
81 |
for pdf_file in pdf_files:
|
82 |
resume_info = parse_resume(pdf_file)
|
83 |
|
84 |
# Only add the parsed resume info if there's meaningful data
|
85 |
+
if any(resume_info.values()):
|
86 |
all_resumes.append(resume_info)
|
87 |
|
88 |
+
# If no resumes are successfully parsed, return None
|
89 |
+
if not all_resumes:
|
90 |
+
print("No valid resume information was parsed.")
|
91 |
+
return None
|
92 |
+
|
93 |
# Convert to DataFrame
|
94 |
df = pd.DataFrame(all_resumes)
|
95 |
|
|
|
|
|
|
|
|
|
96 |
# Define the file path for the Excel file
|
97 |
output_file = "/tmp/parsed_resumes.xlsx"
|
98 |
|
|
|
126 |
# Attempt to log in with provided token
|
127 |
login_message = login_with_token(hf_token)
|
128 |
|
|
|
129 |
if "Error" not in login_message:
|
130 |
+
# Process resumes and generate the download link
|
131 |
excel_file_path = batch_process_resumes(pdf_files)
|
132 |
+
if excel_file_path:
|
133 |
+
return login_message + "\nExcel file with parsed resumes is ready for download.", excel_file_path
|
134 |
+
else:
|
135 |
+
return login_message + "\nNo valid resume information was parsed.", None
|
136 |
else:
|
137 |
return login_message, None
|
138 |
|