Manojajj commited on
Commit
18b1dee
·
verified ·
1 Parent(s): b5578a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -35
app.py CHANGED
@@ -8,7 +8,7 @@ import os
8
 
9
  # Function to login using Hugging Face API token
10
  def login_with_token(hf_token):
11
- """Login to Hugging Face using provided token"""
12
  try:
13
  login(token=hf_token)
14
  return "Logged in successfully!"
@@ -19,46 +19,51 @@ def login_with_token(hf_token):
19
  nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
20
 
21
  def extract_text_from_pdf(pdf_file):
22
- """Extracts text from a PDF file using pdfplumber"""
23
- with pdfplumber.open(pdf_file.name) as pdf:
24
- text = ''
25
- for page in pdf.pages:
26
- text += page.extract_text()
27
- return text
 
 
 
 
28
 
29
  def parse_resume(pdf_file):
30
- """Parses the resume and extracts relevant information"""
31
  # Extract text from PDF
32
  resume_text = extract_text_from_pdf(pdf_file)
33
 
34
- # Log the extracted text for debugging
35
- print("Extracted Text from Resume:")
36
- print(resume_text[:500]) # Print the first 500 characters for preview
37
 
38
  # Use the NER model to identify entities in the resume
39
  entities = nlp(resume_text)
40
 
41
- # Log the NER output for debugging
42
- print("NER Output:")
43
- print(entities)
44
-
45
  # Initialize empty fields
46
  name = email = phone = education = skills = experience = None
47
 
48
  # Example parsing logic based on NER output
49
  for entity in entities:
50
- # Check if 'label' key exists in the entity to avoid KeyError
51
- if 'label' in entity:
52
- if entity['label'] == 'PER':
53
- name = entity['word'] # If detected, use the first person name
54
- elif entity['label'] == 'ORG':
55
- experience = entity['word'] # Could be an organization name (e.g., employer)
56
- elif entity['label'] == 'EMAIL':
57
- email = entity['word']
58
- elif entity['label'] == 'MISC':
59
- skills = entity['word'] # Example for skills or qualifications
60
-
61
- # Log the final parsed information for debugging
 
 
 
 
 
62
  print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}")
63
 
64
  return {
@@ -71,22 +76,23 @@ def parse_resume(pdf_file):
71
  }
72
 
73
  def batch_process_resumes(pdf_files):
74
- """Process a batch of resume PDFs and output in a DataFrame"""
75
  all_resumes = []
76
  for pdf_file in pdf_files:
77
  resume_info = parse_resume(pdf_file)
78
 
79
  # Only add the parsed resume info if there's meaningful data
80
- if any(resume_info.values()): # Skip empty resume entries
81
  all_resumes.append(resume_info)
82
 
 
 
 
 
 
83
  # Convert to DataFrame
84
  df = pd.DataFrame(all_resumes)
85
 
86
- # If the DataFrame is empty, return a message indicating no data was found
87
- if df.empty:
88
- return "No valid resume information was parsed."
89
-
90
  # Define the file path for the Excel file
91
  output_file = "/tmp/parsed_resumes.xlsx"
92
 
@@ -120,10 +126,13 @@ with gr.Blocks() as demo:
120
  # Attempt to log in with provided token
121
  login_message = login_with_token(hf_token)
122
 
123
- # If login is successful, process resumes and generate the download link
124
  if "Error" not in login_message:
 
125
  excel_file_path = batch_process_resumes(pdf_files)
126
- return login_message + "\nExcel file with parsed resumes is ready for download.", excel_file_path
 
 
 
127
  else:
128
  return login_message, None
129
 
 
8
 
9
  # Function to login using Hugging Face API token
10
  def login_with_token(hf_token):
11
+ """Login to Hugging Face using provided token."""
12
  try:
13
  login(token=hf_token)
14
  return "Logged in successfully!"
 
19
  nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
20
 
21
  def extract_text_from_pdf(pdf_file):
22
+ """Extracts text from a PDF file using pdfplumber."""
23
+ try:
24
+ with pdfplumber.open(pdf_file.name) as pdf:
25
+ text = ''
26
+ for page in pdf.pages:
27
+ text += page.extract_text() or "" # Handle pages with no text
28
+ return text
29
+ except Exception as e:
30
+ print(f"Error reading PDF: {e}")
31
+ return ""
32
 
33
  def parse_resume(pdf_file):
34
+ """Parses the resume and extracts relevant information."""
35
  # Extract text from PDF
36
  resume_text = extract_text_from_pdf(pdf_file)
37
 
38
+ if not resume_text.strip():
39
+ print("No text found in PDF.")
40
+ return {}
41
 
42
  # Use the NER model to identify entities in the resume
43
  entities = nlp(resume_text)
44
 
 
 
 
 
45
  # Initialize empty fields
46
  name = email = phone = education = skills = experience = None
47
 
48
  # Example parsing logic based on NER output
49
  for entity in entities:
50
+ label = entity.get("entity", "")
51
+ word = entity.get("word", "").strip()
52
+
53
+ if label == "B-PER" or label == "I-PER":
54
+ name = (name or "") + word + " "
55
+ elif label == "B-ORG" or label == "I-ORG":
56
+ experience = (experience or "") + word + " "
57
+ elif "@" in word: # Simple email detection
58
+ email = word
59
+ elif label == "I-MISC":
60
+ skills = (skills or "") + word + ", "
61
+
62
+ # Clean up trailing spaces and commas
63
+ name = name.strip() if name else None
64
+ skills = skills.rstrip(", ") if skills else None
65
+
66
+ # Log the final parsed information
67
  print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}")
68
 
69
  return {
 
76
  }
77
 
78
  def batch_process_resumes(pdf_files):
79
+ """Processes a batch of resume PDFs and outputs an Excel file."""
80
  all_resumes = []
81
  for pdf_file in pdf_files:
82
  resume_info = parse_resume(pdf_file)
83
 
84
  # Only add the parsed resume info if there's meaningful data
85
+ if any(resume_info.values()):
86
  all_resumes.append(resume_info)
87
 
88
+ # If no resumes are successfully parsed, return None
89
+ if not all_resumes:
90
+ print("No valid resume information was parsed.")
91
+ return None
92
+
93
  # Convert to DataFrame
94
  df = pd.DataFrame(all_resumes)
95
 
 
 
 
 
96
  # Define the file path for the Excel file
97
  output_file = "/tmp/parsed_resumes.xlsx"
98
 
 
126
  # Attempt to log in with provided token
127
  login_message = login_with_token(hf_token)
128
 
 
129
  if "Error" not in login_message:
130
+ # Process resumes and generate the download link
131
  excel_file_path = batch_process_resumes(pdf_files)
132
+ if excel_file_path:
133
+ return login_message + "\nExcel file with parsed resumes is ready for download.", excel_file_path
134
+ else:
135
+ return login_message + "\nNo valid resume information was parsed.", None
136
  else:
137
  return login_message, None
138