Spaces:
Runtime error
Runtime error
robertselvam
commited on
Create utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import pandas as pd
|
4 |
+
from PyPDF2 import PdfReader
|
5 |
+
from huggingface_hub import login
|
6 |
+
from langchain.docstore.document import Document
|
7 |
+
from langchain_openai import AzureOpenAIEmbeddings
|
8 |
+
from openai import AzureOpenAI
|
9 |
+
|
10 |
+
client = AzureOpenAI()
|
11 |
+
|
12 |
+
def get_pdf_text(file_path):
|
13 |
+
|
14 |
+
text = ''
|
15 |
+
pdf = PdfReader(file_path)
|
16 |
+
# Extract text from each page and pass it to the process_text function
|
17 |
+
for page_number in range(len(pdf.pages)):
|
18 |
+
# Extract text from the page
|
19 |
+
page = pdf.pages[page_number]
|
20 |
+
text += page.extract_text()
|
21 |
+
return text
|
22 |
+
|
23 |
+
# that user uploaded PDF files, one by one
|
24 |
+
def create_docs(user_pdf_list, unique_id):
|
25 |
+
docs=[]
|
26 |
+
for filename in user_pdf_list:
|
27 |
+
|
28 |
+
chunks=get_pdf_text(filename)
|
29 |
+
|
30 |
+
#Adding items to our list - Adding data & its metadata
|
31 |
+
docs.append(Document(
|
32 |
+
page_content=chunks,
|
33 |
+
metadata={"name": filename.name,"id":filename.file_id,"type=":filename.type,"size":filename.size,"unique_id":unique_id},
|
34 |
+
))
|
35 |
+
|
36 |
+
return docs
|
37 |
+
|
38 |
+
|
39 |
+
def create_embeddings_load_data():
|
40 |
+
embeddings = AzureOpenAIEmbeddings(
|
41 |
+
azure_deployment="AZUREEMBEDDING"
|
42 |
+
)
|
43 |
+
return embeddings
|
44 |
+
|
45 |
+
|
46 |
+
def opeani_response(resume, job_description):
|
47 |
+
result = ""
|
48 |
+
# job_description = extract_text_from_file(job_description_path)
|
49 |
+
# resume = extract_text_from_file(resume_path)
|
50 |
+
chat_completion = client.chat.completions.create(
|
51 |
+
model = "GPT35",
|
52 |
+
messages = [
|
53 |
+
{"role": "system", "content": "You are a Detailed Resume Matcher For Given Job description."},
|
54 |
+
{"role": "user", "content": f"""
|
55 |
+
Given the job description and the resume, assess the given job description and the resume with detailed analysis. provide matching percentage.
|
56 |
+
**Job Description:**{job_description}
|
57 |
+
**Resume:**{resume}
|
58 |
+
|
59 |
+
**Detailed Analysis:**
|
60 |
+
**the result should be in this format:**
|
61 |
+
'''Matched Percentage: [matching percentage].
|
62 |
+
Reason: [Reasons for why this resume matched and not matched.].
|
63 |
+
Skills To Improve: [Mention the skills to improve for the candidate according to the given job description.].
|
64 |
+
Irrelevant: [mention the irrelevant skills and experience].
|
65 |
+
Keywords: [Return the matched keywords from resume and job description.]'''
|
66 |
+
"""}
|
67 |
+
],
|
68 |
+
max_tokens=500,
|
69 |
+
temperature=0
|
70 |
+
)
|
71 |
+
generated_text = chat_completion.choices[0].message.content
|
72 |
+
print(generated_text)
|
73 |
+
return generated_text
|
74 |
+
|
75 |
+
def get_strip_response(matched_result):
|
76 |
+
lines = matched_result.split('\n')
|
77 |
+
matched_percentage = None
|
78 |
+
reason = []
|
79 |
+
skills_to_improve = []
|
80 |
+
keywords = []
|
81 |
+
irrelevant = []
|
82 |
+
|
83 |
+
section = None
|
84 |
+
|
85 |
+
for line in lines:
|
86 |
+
line = line.strip()
|
87 |
+
if line.startswith('Matched Percentage:'):
|
88 |
+
match = re.search(r"Matched Percentage: (\d+)%", line)
|
89 |
+
if match:
|
90 |
+
matched_percentage = int(match.group(1))
|
91 |
+
elif line.startswith('Reason:'):
|
92 |
+
section = 'reason'
|
93 |
+
reason.append(line.split(':', 1)[1].strip())
|
94 |
+
elif line.startswith('Skills To Improve:'):
|
95 |
+
section = 'skills_to_improve'
|
96 |
+
skills_to_improve.append(line.split(':', 1)[1].strip())
|
97 |
+
elif line.startswith('Keywords:'):
|
98 |
+
section = 'keywords'
|
99 |
+
keywords.append(line.split(':', 1)[1].strip())
|
100 |
+
elif line.startswith('Irrelevant:'):
|
101 |
+
section = 'irrelevant'
|
102 |
+
irrelevant.append(line.split(':', 1)[1].strip())
|
103 |
+
else:
|
104 |
+
if section == 'reason':
|
105 |
+
reason.append(line)
|
106 |
+
elif section == 'skills_to_improve':
|
107 |
+
skills_to_improve.append(line)
|
108 |
+
elif section == 'keywords':
|
109 |
+
keywords.append(line)
|
110 |
+
elif section == 'irrelevant':
|
111 |
+
irrelevant.append(line)
|
112 |
+
|
113 |
+
# Join the lists into strings
|
114 |
+
reason = ' '.join(reason).strip('- ')
|
115 |
+
skills_to_improve = ' '.join(skills_to_improve).strip('- ')
|
116 |
+
keywords = ' '.join(keywords).strip('- ')
|
117 |
+
irrelevant = ' '.join(irrelevant).strip('- ')
|
118 |
+
|
119 |
+
return matched_percentage, reason, skills_to_improve, keywords, irrelevant
|
120 |
+
|
121 |
+
|
122 |
+
# Helps us get the summary of a document
|
123 |
+
def get_summary(resume):
|
124 |
+
|
125 |
+
chat_completion = client.chat.completions.create(
|
126 |
+
model = "GPT35",
|
127 |
+
messages = [
|
128 |
+
{"role": "system", "content": "You are a Resume summarizer."},
|
129 |
+
{"role": "user", "content": f"""Summarize the given resume within 60 words. resume : {resume}
|
130 |
+
"""}
|
131 |
+
],
|
132 |
+
max_tokens=200,
|
133 |
+
temperature=0
|
134 |
+
)
|
135 |
+
summary = chat_completion.choices[0].message.content
|
136 |
+
|
137 |
+
return summary
|