Manojajj commited on
Commit
dd6777e
·
verified ·
1 Parent(s): b737c1e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+ import pandas as pd
5
+ import re
6
+
7
+ # Load pre-trained model for Named Entity Recognition (NER) to extract details
8
+ nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
9
+
10
+ def parse_resume(resume_text):
11
+ """Parse the resume and extract details like name, email, phone, and skills."""
12
+ # Define regex for phone and email extraction
13
+ phone_pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
14
+ email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
15
+
16
+ # Extract phone and email using regex
17
+ phone = re.findall(phone_pattern, resume_text)
18
+ email = re.findall(email_pattern, resume_text)
19
+
20
+ # Extract named entities for skills
21
+ entities = nlp(resume_text)
22
+ skills = [entity['word'] for entity in entities if 'MISC' in entity['entity']]
23
+
24
+ # Handle case if no skills found
25
+ skills = ", ".join(skills) if skills else "No skills found"
26
+
27
+ # Create a dictionary of parsed data (exclude Experience, Education, Certifications)
28
+ parsed_data = {
29
+ "Phone": phone[0] if phone else "Not found",
30
+ "Email": email[0] if email else "Not found",
31
+ "Skills": skills,
32
+ }
33
+
34
+ return parsed_data
35
+
36
+ def process_resumes(csv_file):
37
+ """Process a CSV file of resumes and output a single Excel file."""
38
+ # Read the CSV file
39
+ df = pd.read_csv(csv_file.name)
40
+
41
+ # Ensure the column with resume text is named 'Resume' (you can adjust this as needed)
42
+ if 'Resume' not in df.columns:
43
+ return "Error: The CSV file must contain a 'Resume' column."
44
+
45
+ all_parsed_data = []
46
+
47
+ # Loop through each row in the CSV and parse the resume text
48
+ for _, row in df.iterrows():
49
+ resume_text = row['Resume'] # Assuming the column name is 'Resume'
50
+ parsed_info = parse_resume(resume_text)
51
+ all_parsed_data.append(parsed_info)
52
+
53
+ # Convert the parsed data into a pandas DataFrame
54
+ parsed_df = pd.DataFrame(all_parsed_data)
55
+
56
+ # Save the DataFrame to an Excel file
57
+ output_file = "parsed_resumes.xlsx"
58
+ parsed_df.to_excel(output_file, index=False)
59
+
60
+ return output_file
61
+
62
+ # Define Gradio interface
63
+ gr.Interface(
64
+ fn=process_resumes,
65
+ inputs=gr.File(file_count="single", label="Upload Resume CSV"),
66
+ outputs=gr.File(label="Download Parsed Data (Excel)"),
67
+ title="AI Resume Parser",
68
+ description="Upload a CSV file containing resume texts to extract details like Name, Email, Phone, and Skills. The results will be saved in an Excel file."
69
+ ).launch()