Update app.py
Browse files
app.py
CHANGED
@@ -1,87 +1,77 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import torch
|
3 |
from transformers import pipeline
|
4 |
-
import pdfplumber
|
5 |
-
import re
|
6 |
import pandas as pd
|
|
|
7 |
|
8 |
-
#
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def extract_text_from_pdf(pdf_file):
|
12 |
-
"""
|
13 |
-
with pdfplumber.open(pdf_file) as pdf:
|
14 |
-
text =
|
15 |
for page in pdf.pages:
|
16 |
text += page.extract_text()
|
17 |
return text
|
18 |
|
19 |
-
def parse_resume(
|
20 |
-
"""
|
21 |
-
#
|
22 |
-
|
23 |
-
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
|
24 |
-
|
25 |
-
# Extract phone and email using regex
|
26 |
-
phone = re.findall(phone_pattern, resume_text)
|
27 |
-
email = re.findall(email_pattern, resume_text)
|
28 |
-
|
29 |
-
# Extract named entities (e.g., skills, education, and experience)
|
30 |
-
entities = nlp(resume_text)
|
31 |
|
32 |
-
#
|
33 |
-
|
34 |
-
experience = []
|
35 |
-
education = []
|
36 |
-
certifications = []
|
37 |
|
38 |
-
#
|
|
|
|
|
|
|
39 |
for entity in entities:
|
40 |
-
if '
|
41 |
-
|
42 |
-
elif '
|
43 |
-
|
44 |
-
elif '
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
"Certifications": ", ".join(certifications)
|
57 |
}
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
"""Process multiple resumes and output a single Excel file."""
|
63 |
-
all_parsed_data = []
|
64 |
-
|
65 |
-
# Loop through each uploaded PDF file and parse the data
|
66 |
for pdf_file in pdf_files:
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
output_file = "parsed_resumes.xlsx"
|
76 |
-
df.to_excel(output_file, index=False)
|
77 |
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
gr.Interface(
|
82 |
-
fn=process_resumes,
|
83 |
-
inputs=gr.File(file_count="multiple", label="Upload Resumes (PDFs)"),
|
84 |
-
outputs=gr.File(label="Download Parsed Data (Excel)"),
|
85 |
-
title="AI Resume Parser",
|
86 |
-
description="Upload multiple resumes (PDFs) to extract details like Name, Email, Phone, Skills, Experience, Education, and Certifications. The results will be saved in an Excel file."
|
87 |
-
).launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import pdfplumber
|
3 |
import torch
|
4 |
from transformers import pipeline
|
|
|
|
|
5 |
import pandas as pd
|
6 |
+
from huggingface_hub import login
|
7 |
|
8 |
+
# Log in to Hugging Face using your API token (if needed for private models)
|
9 |
+
# You can generate an API token from https://huggingface.co/settings/tokens
|
10 |
+
login(token="your_huggingface_token")
|
11 |
+
|
12 |
+
# Load the model for Named Entity Recognition (NER)
|
13 |
+
# You can replace 'dbmdz/bert-large-cased-finetuned-conll03-english' with any other model if needed
|
14 |
+
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
|
15 |
|
16 |
def extract_text_from_pdf(pdf_file):
|
17 |
+
"""Extracts text from a PDF file using pdfplumber"""
|
18 |
+
with pdfplumber.open(pdf_file.name) as pdf:
|
19 |
+
text = ''
|
20 |
for page in pdf.pages:
|
21 |
text += page.extract_text()
|
22 |
return text
|
23 |
|
24 |
+
def parse_resume(pdf_file):
|
25 |
+
"""Parses the resume and extracts relevant information"""
|
26 |
+
# Extract text from PDF
|
27 |
+
resume_text = extract_text_from_pdf(pdf_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
# Use the NER model to identify entities in the resume
|
30 |
+
entities = nlp(resume_text)
|
|
|
|
|
|
|
31 |
|
32 |
+
# Initialize empty fields
|
33 |
+
name = email = phone = education = skills = experience = None
|
34 |
+
|
35 |
+
# Example parsing logic based on NER output
|
36 |
for entity in entities:
|
37 |
+
if entity['label'] == 'PER':
|
38 |
+
name = entity['word'] # If detected, use the first person name
|
39 |
+
elif entity['label'] == 'ORG':
|
40 |
+
experience = entity['word'] # Could be an organization name (e.g., employer)
|
41 |
+
elif entity['label'] == 'EMAIL':
|
42 |
+
email = entity['word']
|
43 |
+
elif entity['label'] == 'MISC':
|
44 |
+
skills = entity['word'] # Example for skills or qualifications
|
45 |
|
46 |
+
return {
|
47 |
+
'Name': name,
|
48 |
+
'Email': email,
|
49 |
+
'Phone': phone,
|
50 |
+
'Education': education,
|
51 |
+
'Skills': skills,
|
52 |
+
'Experience': experience,
|
|
|
53 |
}
|
54 |
|
55 |
+
def batch_process_resumes(pdf_files):
|
56 |
+
"""Process a batch of resume PDFs and output in a DataFrame"""
|
57 |
+
all_resumes = []
|
|
|
|
|
|
|
|
|
58 |
for pdf_file in pdf_files:
|
59 |
+
resume_info = parse_resume(pdf_file)
|
60 |
+
all_resumes.append(resume_info)
|
61 |
+
|
62 |
+
# Convert to DataFrame
|
63 |
+
df = pd.DataFrame(all_resumes)
|
64 |
+
# Save to Excel
|
65 |
+
df.to_excel("parsed_resumes.xlsx", index=False)
|
66 |
+
return "Excel file with parsed resumes has been saved as 'parsed_resumes.xlsx'."
|
|
|
|
|
67 |
|
68 |
+
# Gradio interface
|
69 |
+
with gr.Blocks() as demo:
|
70 |
+
gr.Markdown("### AI Resume Parser")
|
71 |
+
file_input = gr.File(file_count="multiple", label="Upload Resumes (PDFs)")
|
72 |
+
output = gr.Textbox(label="Result")
|
73 |
+
process_button = gr.Button("Process Resumes")
|
74 |
+
|
75 |
+
process_button.click(batch_process_resumes, inputs=file_input, outputs=output)
|
76 |
|
77 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|