Manojajj commited on
Commit
67ba08f
·
verified ·
1 Parent(s): 28bc9a6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pdfplumber
3
+ import re
4
+ import openpyxl
5
+ from transformers import pipeline
6
+
7
+ # Function to extract text from PDF
8
+ def extract_text_from_pdf(pdf_path):
9
+ with pdfplumber.open(pdf_path) as pdf:
10
+ text = ''
11
+ for page in pdf.pages:
12
+ text += page.extract_text()
13
+ return text
14
+
15
+ # Load Llama model for parsing (replace with actual model path if available)
16
+ model_name = "meta-llama/Llama-3.1-70B-Instruct" # Replace with the model path or identifier
17
+ nlp = pipeline("text2text-generation", model=model_name)
18
+
19
+ # Function to parse the resume text for name, email, phone, and skills
20
+ def parse_resume(text):
21
+ # Define the prompts for each type of information
22
+ prompts = {
23
+ "name": "Extract the name from this resume:\n",
24
+ "email": "Extract the email address from this resume:\n",
25
+ "phone": "Extract the phone number from this resume:\n",
26
+ "skills": "Extract the technical skills from this resume:\n"
27
+ }
28
+
29
+ results = {}
30
+
31
+ for key, prompt in prompts.items():
32
+ # Get the model response for each field
33
+ response = nlp(prompt + text)
34
+
35
+ if key == 'email':
36
+ # Use regex to validate email format
37
+ email = re.findall(r'\S+@\S+', response[0]['generated_text'])
38
+ results[key] = email[0] if email else None
39
+ elif key == 'phone':
40
+ # Use regex to validate phone number format
41
+ phone = re.findall(r'\b\d{10,15}\b', response[0]['generated_text'])
42
+ results[key] = phone[0] if phone else None
43
+ elif key == 'skills':
44
+ # Extract technical skills
45
+ results[key] = response[0]['generated_text']
46
+ else:
47
+ results[key] = response[0]['generated_text']
48
+
49
+ return results
50
+
51
+ # Function to save parsed data to Excel file
52
+ def save_to_excel(parsed_data, output_file):
53
+ wb = openpyxl.Workbook()
54
+ ws = wb.active
55
+ ws.append(["Name", "Email", "Phone", "Skills"])
56
+
57
+ for data in parsed_data:
58
+ ws.append([data["name"], data["email"], data["phone"], data["skills"]])
59
+
60
+ wb.save(output_file)
61
+
62
+ # Function to process PDF files and output an Excel file
63
+ def process_pdfs(pdfs):
64
+ parsed_data = []
65
+
66
+ for pdf in pdfs:
67
+ # Extract text from the PDF
68
+ text = extract_text_from_pdf(pdf.name)
69
+
70
+ # Parse the text for relevant details
71
+ parsed_info = parse_resume(text)
72
+
73
+ # Add parsed information to the list
74
+ parsed_data.append(parsed_info)
75
+
76
+ # Save the parsed data to an Excel file
77
+ output_file = "parsed_resumes.xlsx"
78
+ save_to_excel(parsed_data, output_file)
79
+
80
+ return output_file
81
+
82
+ # Gradio interface setup
83
+ iface = gr.Interface(
84
+ fn=process_pdfs,
85
+ inputs=gr.File(file_count="multiple", type="file"),
86
+ outputs=gr.File(),
87
+ live=True
88
+ )
89
+
90
+ # Launch the Gradio app
91
+ iface.launch()