robertselvam's picture
Create utils.py
d89a462 verified
import os
import re
import pandas as pd
from PyPDF2 import PdfReader
from huggingface_hub import login
from langchain.docstore.document import Document
from langchain_openai import AzureOpenAIEmbeddings
from openai import AzureOpenAI
client = AzureOpenAI()
def get_pdf_text(file_path):
text = ''
pdf = PdfReader(file_path)
# Extract text from each page and pass it to the process_text function
for page_number in range(len(pdf.pages)):
# Extract text from the page
page = pdf.pages[page_number]
text += page.extract_text()
return text
# that user uploaded PDF files, one by one
def create_docs(user_pdf_list, unique_id):
docs=[]
for filename in user_pdf_list:
chunks=get_pdf_text(filename)
#Adding items to our list - Adding data & its metadata
docs.append(Document(
page_content=chunks,
metadata={"name": filename.name,"id":filename.file_id,"type=":filename.type,"size":filename.size,"unique_id":unique_id},
))
return docs
def create_embeddings_load_data():
embeddings = AzureOpenAIEmbeddings(
azure_deployment="AZUREEMBEDDING"
)
return embeddings
def opeani_response(resume, job_description):
result = ""
# job_description = extract_text_from_file(job_description_path)
# resume = extract_text_from_file(resume_path)
chat_completion = client.chat.completions.create(
model = "GPT35",
messages = [
{"role": "system", "content": "You are a Detailed Resume Matcher For Given Job description."},
{"role": "user", "content": f"""
Given the job description and the resume, assess the given job description and the resume with detailed analysis. provide matching percentage.
**Job Description:**{job_description}
**Resume:**{resume}
**Detailed Analysis:**
**the result should be in this format:**
'''Matched Percentage: [matching percentage].
Reason: [Reasons for why this resume matched and not matched.].
Skills To Improve: [Mention the skills to improve for the candidate according to the given job description.].
Irrelevant: [mention the irrelevant skills and experience].
Keywords: [Return the matched keywords from resume and job description.]'''
"""}
],
max_tokens=500,
temperature=0
)
generated_text = chat_completion.choices[0].message.content
print(generated_text)
return generated_text
def get_strip_response(matched_result):
lines = matched_result.split('\n')
matched_percentage = None
reason = []
skills_to_improve = []
keywords = []
irrelevant = []
section = None
for line in lines:
line = line.strip()
if line.startswith('Matched Percentage:'):
match = re.search(r"Matched Percentage: (\d+)%", line)
if match:
matched_percentage = int(match.group(1))
elif line.startswith('Reason:'):
section = 'reason'
reason.append(line.split(':', 1)[1].strip())
elif line.startswith('Skills To Improve:'):
section = 'skills_to_improve'
skills_to_improve.append(line.split(':', 1)[1].strip())
elif line.startswith('Keywords:'):
section = 'keywords'
keywords.append(line.split(':', 1)[1].strip())
elif line.startswith('Irrelevant:'):
section = 'irrelevant'
irrelevant.append(line.split(':', 1)[1].strip())
else:
if section == 'reason':
reason.append(line)
elif section == 'skills_to_improve':
skills_to_improve.append(line)
elif section == 'keywords':
keywords.append(line)
elif section == 'irrelevant':
irrelevant.append(line)
# Join the lists into strings
reason = ' '.join(reason).strip('- ')
skills_to_improve = ' '.join(skills_to_improve).strip('- ')
keywords = ' '.join(keywords).strip('- ')
irrelevant = ' '.join(irrelevant).strip('- ')
return matched_percentage, reason, skills_to_improve, keywords, irrelevant
# Helps us get the summary of a document
def get_summary(resume):
chat_completion = client.chat.completions.create(
model = "GPT35",
messages = [
{"role": "system", "content": "You are a Resume summarizer."},
{"role": "user", "content": f"""Summarize the given resume within 60 words. resume : {resume}
"""}
],
max_tokens=200,
temperature=0
)
summary = chat_completion.choices[0].message.content
return summary