File size: 6,836 Bytes
416760a
 
 
 
e21cf92
 
416760a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e21cf92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416760a
 
 
 
e21cf92
416760a
 
 
e21cf92
416760a
 
 
 
e21cf92
 
416760a
 
 
e21cf92
416760a
e21cf92
416760a
 
e21cf92
 
eb7a184
e21cf92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416760a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
from PyPDF2 import PdfReader
from agents.agents import get_agent_groq
import json
import re
import time
from agents import prompts 


def parse_resume(path):
    loader = PdfReader(path)
    text=''
    print(len(loader.pages))
    for i in range(len(loader.pages)):
       text+= loader.pages[i].extract_text()
    return text
def parse_resumes(resumes_list):
    resumes_text=[]
    for resume in resumes_list:
      loader = PdfReader(resume)
      text=''
      #print(len(loader.pages))
      for i in range(len(loader.pages)):
        text+= loader.pages[i].extract_text()
      resumes_text.append(text)
    return resumes_text

def parse_(resumes_list):
    resumes_text=[]
    for resume in resumes_list:
      text=parse_resume(resume)
      resumes_text.append(text)
    return resumes_text


from typing_extensions import Annotated, TypedDict, Optional

# Define TypedDict for structured output
class ResumeAnalysis(TypedDict):
    candidate_name: Annotated[str, ..., "Name of the candidate with the highest score"]
    overall_match_score: Annotated[int, ..., "sum of scores for skills_keywords_score, experience_score, education_certifications_score, and preferred_qualifications_score (Whole Number)"]
    skills_keywords_score: Annotated[int, ..., "Score for Skills and Keywords (0-40)"]
    skills_keywords_explanation: Annotated[str, ..., "Explanation for Skills and Keywords"]
    experience_score: Annotated[int, ..., "Score for Experience (0-30)"]
    experience_explanation: Annotated[str, ..., "Explanation for Experience"]
    education_certifications_score: Annotated[int, ..., "Score for Education & Certifications (0-20)"]
    education_certifications_explanation: Annotated[str, ..., "Explanation for Education & Certifications"]
    preferred_qualifications_score: Annotated[int, ..., "Score for Preferred Qualifications (0-10)"]
    preferred_qualifications_explanation: Annotated[str, ..., "Explanation for Preferred Qualifications"]
    score_interpretation: Annotated[str, ..., "donot mention any numbers here, just Interpretation in words of the overall_match_score"]

# Use structured output with the LLM

def generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template):
    # Send the structured prompt to the agent and expect a structured response
    agent =  get_agent_groq().with_structured_output(ResumeAnalysis)
  # using structured output LLM
    response = agent.invoke(
        prompt_template.format(
            resume=resume_text,
            job_listing=job_listing_text,
            job_title_text=job_title_text,
            must_have=must_have
        )
    )
    response['overall_match_score']=response['skills_keywords_score']+response['education_certifications_score']+response['experience_score']+response['preferred_qualifications_score']
    print(response)
    return response  # response is already structured as per ResumeAnalysis

def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
    agent = get_agent_groq()
    resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
    #print('response of agent',resp)
    text_res=extract(resp.content)
    #text_res=extract(text_res)
    #chain = prompt | agent
    #print(text_res)
    #text = resp.content
    return text_res

def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
    prompt_templates = prompts.prompt_template_modern
    generate_individual_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_templates)
      #chain = prompt | agent
    agent = get_agent_groq()
    response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
    #print(response.content)
    text_res=extract_sel(response.content)
    #print(text_res)
    return text_res


# Analyzing each resume individually and handling delays to avoid token limits
def generate_individual_analysis(resumes, job_listing_text, job_title_text, must_have, prompt_template, delay=20):
    #agent = get_agent_groq()
    all_results = []

    for resume_text in resumes:
        structured_response= generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template)
        #agent =  get_agent_groq().with_structured_output(ResumeAnalysis)
       # print(response)
        if structured_response:
            all_results.append(structured_response)

        # Adding delay to avoid the 6000 tokens per minute limit
        time.sleep(delay)

    # Sorting results by match score (or any other criteria you prefer)
    best_match = max(all_results, key=lambda x: x.get("overall_match_score", 0))
    print('best_match',best_match)
    print('all_results',all_results)
    return all_results

def extract(content):
    
    json_pattern = r'```\n(.*?)\n```'
    json_string = re.search(json_pattern, content, re.DOTALL).group(1)

    # Load the extracted JSON string into a dictionary
    data = json.loads(json_string)
    new={}
    # Print the extracted variables and their values
    for key, value in data.items():
        print(f"{key}: {value}")
        new[key]=value
    return new
def extract_mist(json_string):
       # Load the extracted JSON string into a dictionary
    data = json.loads(json_string)
    new={}
    # Print the extracted variables and their values
    for key, value in data.items():
        print(f"{key}: {value}")
        new[key]=value
    return new


def extract_sel(content):
    try:
        # Split the content by identifying each candidate section using the candidate names (bolded)
        candidates = re.split(r'\*\*(.*?)\*\*', content)  # Split on the pattern of bolded names
        
        # The split result will have alternating candidate names and JSON sections
        candidate_json_list = []
        
        for i in range(1, len(candidates), 2):  # Iterate over candidate name and their JSON parts
            candidate_name = candidates[i].strip()  # Candidate name
            json_string = candidates[i+1].strip()   # JSON string part
            
            # Load the JSON string into a dictionary
            candidate_data = json.loads(json_string)
            candidate_json_list.append(candidate_data)
        
        return candidate_json_list
    
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return []

def generate_adv(job_listing_text,job_title_text, prompt_template):
   # if model_selection=="Groq":
    agent = get_agent_groq()
    resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text))
    text = resp.content
    print(text)
    return text