Spaces:
Sleeping
Sleeping
File size: 6,836 Bytes
416760a e21cf92 416760a e21cf92 416760a e21cf92 416760a e21cf92 416760a e21cf92 416760a e21cf92 416760a e21cf92 416760a e21cf92 eb7a184 e21cf92 416760a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
from PyPDF2 import PdfReader
from agents.agents import get_agent_groq
import json
import re
import time
from agents import prompts
def parse_resume(path):
loader = PdfReader(path)
text=''
print(len(loader.pages))
for i in range(len(loader.pages)):
text+= loader.pages[i].extract_text()
return text
def parse_resumes(resumes_list):
resumes_text=[]
for resume in resumes_list:
loader = PdfReader(resume)
text=''
#print(len(loader.pages))
for i in range(len(loader.pages)):
text+= loader.pages[i].extract_text()
resumes_text.append(text)
return resumes_text
def parse_(resumes_list):
resumes_text=[]
for resume in resumes_list:
text=parse_resume(resume)
resumes_text.append(text)
return resumes_text
from typing_extensions import Annotated, TypedDict, Optional
# Define TypedDict for structured output
class ResumeAnalysis(TypedDict):
candidate_name: Annotated[str, ..., "Name of the candidate with the highest score"]
overall_match_score: Annotated[int, ..., "sum of scores for skills_keywords_score, experience_score, education_certifications_score, and preferred_qualifications_score (Whole Number)"]
skills_keywords_score: Annotated[int, ..., "Score for Skills and Keywords (0-40)"]
skills_keywords_explanation: Annotated[str, ..., "Explanation for Skills and Keywords"]
experience_score: Annotated[int, ..., "Score for Experience (0-30)"]
experience_explanation: Annotated[str, ..., "Explanation for Experience"]
education_certifications_score: Annotated[int, ..., "Score for Education & Certifications (0-20)"]
education_certifications_explanation: Annotated[str, ..., "Explanation for Education & Certifications"]
preferred_qualifications_score: Annotated[int, ..., "Score for Preferred Qualifications (0-10)"]
preferred_qualifications_explanation: Annotated[str, ..., "Explanation for Preferred Qualifications"]
score_interpretation: Annotated[str, ..., "donot mention any numbers here, just Interpretation in words of the overall_match_score"]
# Use structured output with the LLM
def generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template):
# Send the structured prompt to the agent and expect a structured response
agent = get_agent_groq().with_structured_output(ResumeAnalysis)
# using structured output LLM
response = agent.invoke(
prompt_template.format(
resume=resume_text,
job_listing=job_listing_text,
job_title_text=job_title_text,
must_have=must_have
)
)
response['overall_match_score']=response['skills_keywords_score']+response['education_certifications_score']+response['experience_score']+response['preferred_qualifications_score']
print(response)
return response # response is already structured as per ResumeAnalysis
def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
agent = get_agent_groq()
resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
#print('response of agent',resp)
text_res=extract(resp.content)
#text_res=extract(text_res)
#chain = prompt | agent
#print(text_res)
#text = resp.content
return text_res
def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
prompt_templates = prompts.prompt_template_modern
generate_individual_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_templates)
#chain = prompt | agent
agent = get_agent_groq()
response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
#print(response.content)
text_res=extract_sel(response.content)
#print(text_res)
return text_res
# Analyzing each resume individually and handling delays to avoid token limits
def generate_individual_analysis(resumes, job_listing_text, job_title_text, must_have, prompt_template, delay=20):
#agent = get_agent_groq()
all_results = []
for resume_text in resumes:
structured_response= generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template)
#agent = get_agent_groq().with_structured_output(ResumeAnalysis)
# print(response)
if structured_response:
all_results.append(structured_response)
# Adding delay to avoid the 6000 tokens per minute limit
time.sleep(delay)
# Sorting results by match score (or any other criteria you prefer)
best_match = max(all_results, key=lambda x: x.get("overall_match_score", 0))
print('best_match',best_match)
print('all_results',all_results)
return all_results
def extract(content):
json_pattern = r'```\n(.*?)\n```'
json_string = re.search(json_pattern, content, re.DOTALL).group(1)
# Load the extracted JSON string into a dictionary
data = json.loads(json_string)
new={}
# Print the extracted variables and their values
for key, value in data.items():
print(f"{key}: {value}")
new[key]=value
return new
def extract_mist(json_string):
# Load the extracted JSON string into a dictionary
data = json.loads(json_string)
new={}
# Print the extracted variables and their values
for key, value in data.items():
print(f"{key}: {value}")
new[key]=value
return new
def extract_sel(content):
try:
# Split the content by identifying each candidate section using the candidate names (bolded)
candidates = re.split(r'\*\*(.*?)\*\*', content) # Split on the pattern of bolded names
# The split result will have alternating candidate names and JSON sections
candidate_json_list = []
for i in range(1, len(candidates), 2): # Iterate over candidate name and their JSON parts
candidate_name = candidates[i].strip() # Candidate name
json_string = candidates[i+1].strip() # JSON string part
# Load the JSON string into a dictionary
candidate_data = json.loads(json_string)
candidate_json_list.append(candidate_data)
return candidate_json_list
except json.JSONDecodeError as e:
print(f"Error decoding JSON: {e}")
return []
def generate_adv(job_listing_text,job_title_text, prompt_template):
# if model_selection=="Groq":
agent = get_agent_groq()
resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text))
text = resp.content
print(text)
return text |