Manojajj commited on
Commit
4eec766
·
verified ·
1 Parent(s): e6cc67d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+ import pdfplumber
5
+ import re
6
+ import pandas as pd
7
+
8
+ # Load a different pre-trained model for Named Entity Recognition (NER)
9
+ nlp = pipeline("ner", model="huggingface/bert-base-cased", framework="pt")
10
+
11
+ def extract_text_from_pdf(pdf_file):
12
+ """Extract text from the uploaded PDF resume."""
13
+ with pdfplumber.open(pdf_file) as pdf:
14
+ text = ""
15
+ for page in pdf.pages:
16
+ text += page.extract_text()
17
+ return text
18
+
19
+ def parse_resume(resume_text):
20
+ """Parse the resume and extract details like name, email, phone, skills, etc."""
21
+ # Define regex for phone and email extraction
22
+ phone_pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
23
+ email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
24
+
25
+ # Extract phone and email using regex
26
+ phone = re.findall(phone_pattern, resume_text)
27
+ email = re.findall(email_pattern, resume_text)
28
+
29
+ # Extract named entities (e.g., skills, education, and experience)
30
+ entities = nlp(resume_text)
31
+
32
+ # For simplicity, we just list out the entities here
33
+ skills = []
34
+ experience = []
35
+ education = []
36
+ certifications = []
37
+
38
+ # Iterate through recognized entities and categorize them
39
+ for entity in entities:
40
+ if 'ORG' in entity['entity']:
41
+ experience.append(entity['word'])
42
+ elif 'MISC' in entity['entity']:
43
+ skills.append(entity['word'])
44
+ elif 'LOC' in entity['entity']:
45
+ education.append(entity['word'])
46
+ else:
47
+ certifications.append(entity['word'])
48
+
49
+ # Create a dictionary of parsed data
50
+ parsed_data = {
51
+ "Phone": phone[0] if phone else "Not found",
52
+ "Email": email[0] if email else "Not found",
53
+ "Skills": ", ".join(skills),
54
+ "Experience": ", ".join(experience),
55
+ "Education": ", ".join(education),
56
+ "Certifications": ", ".join(certifications)
57
+ }
58
+
59
+ return parsed_data
60
+
61
+ def process_resumes(pdf_files):
62
+ """Process multiple resumes and output a single Excel file."""
63
+ all_parsed_data = []
64
+
65
+ # Loop through each uploaded PDF file and parse the data
66
+ for pdf_file in pdf_files:
67
+ resume_text = extract_text_from_pdf(pdf_file)
68
+ parsed_info = parse_resume(resume_text)
69
+ all_parsed_data.append(parsed_info)
70
+
71
+ # Convert the parsed data into a pandas DataFrame
72
+ df = pd.DataFrame(all_parsed_data)
73
+
74
+ # Save the DataFrame to an Excel file
75
+ output_file = "parsed_resumes.xlsx"
76
+ df.to_excel(output_file, index=False)
77
+
78
+ return output_file
79
+
80
+ # Define Gradio interface
81
+ gr.Interface(
82
+ fn=process_resumes,
83
+ inputs=gr.File(file_count="multiple", label="Upload Resumes (PDFs)"),
84
+ outputs=gr.File(label="Download Parsed Data (Excel)"),
85
+ title="AI Resume Parser",
86
+ description="Upload multiple resumes (PDFs) to extract details like Name, Email, Phone, Skills, Experience, Education, and Certifications. The results will be saved in an Excel file."
87
+ ).launch()