Spaces:

datasciencedojo
/

SmartHire-Assistant

Sleeping

File size: 6,836 Bytes

from PyPDF2 import PdfReader
from agents.agents import get_agent_groq
import json
import re
import time
from agents import prompts 


def parse_resume(path):
    loader = PdfReader(path)
    text=''
    print(len(loader.pages))
    for i in range(len(loader.pages)):
       text+= loader.pages[i].extract_text()
    return text
def parse_resumes(resumes_list):
    resumes_text=[]
    for resume in resumes_list:
      loader = PdfReader(resume)
      text=''
      #print(len(loader.pages))
      for i in range(len(loader.pages)):
        text+= loader.pages[i].extract_text()
      resumes_text.append(text)
    return resumes_text

def parse_(resumes_list):
    resumes_text=[]
    for resume in resumes_list:
      text=parse_resume(resume)
      resumes_text.append(text)
    return resumes_text


from typing_extensions import Annotated, TypedDict, Optional

# Define TypedDict for structured output
class ResumeAnalysis(TypedDict):
    candidate_name: Annotated[str, ..., "Name of the candidate with the highest score"]
    overall_match_score: Annotated[int, ..., "sum of scores for skills_keywords_score, experience_score, education_certifications_score, and preferred_qualifications_score (Whole Number)"]
    skills_keywords_score: Annotated[int, ..., "Score for Skills and Keywords (0-40)"]
    skills_keywords_explanation: Annotated[str, ..., "Explanation for Skills and Keywords"]
    experience_score: Annotated[int, ..., "Score for Experience (0-30)"]
    experience_explanation: Annotated[str, ..., "Explanation for Experience"]
    education_certifications_score: Annotated[int, ..., "Score for Education & Certifications (0-20)"]
    education_certifications_explanation: Annotated[str, ..., "Explanation for Education & Certifications"]
    preferred_qualifications_score: Annotated[int, ..., "Score for Preferred Qualifications (0-10)"]
    preferred_qualifications_explanation: Annotated[str, ..., "Explanation for Preferred Qualifications"]
    score_interpretation: Annotated[str, ..., "donot mention any numbers here, just Interpretation in words of the overall_match_score"]

# Use structured output with the LLM

def generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template):
    # Send the structured prompt to the agent and expect a structured response
    agent =  get_agent_groq().with_structured_output(ResumeAnalysis)
  # using structured output LLM
    response = agent.invoke(
        prompt_template.format(
            resume=resume_text,
            job_listing=job_listing_text,
            job_title_text=job_title_text,
            must_have=must_have
        )
    )
    response['overall_match_score']=response['skills_keywords_score']+response['education_certifications_score']+response['experience_score']+response['preferred_qualifications_score']
    print(response)
    return response  # response is already structured as per ResumeAnalysis

def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
    agent = get_agent_groq()
    resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
    #print('response of agent',resp)
    text_res=extract(resp.content)
    #text_res=extract(text_res)
    #chain = prompt | agent
    #print(text_res)
    #text = resp.content
    return text_res

def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
    prompt_templates = prompts.prompt_template_modern
    generate_individual_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_templates)
      #chain = prompt | agent
    agent = get_agent_groq()
    response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
    #print(response.content)
    text_res=extract_sel(response.content)
    #print(text_res)
    return text_res


# Analyzing each resume individually and handling delays to avoid token limits
def generate_individual_analysis(resumes, job_listing_text, job_title_text, must_have, prompt_template, delay=20):
    #agent = get_agent_groq()
    all_results = []

    for resume_text in resumes:
        structured_response= generate_analysis_new(resume_text, job_listing_text, job_title_text, must_have, prompt_template)
        #agent =  get_agent_groq().with_structured_output(ResumeAnalysis)
       # print(response)
        if structured_response:
            all_results.append(structured_response)

        # Adding delay to avoid the 6000 tokens per minute limit
        time.sleep(delay)

    # Sorting results by match score (or any other criteria you prefer)
    best_match = max(all_results, key=lambda x: x.get("overall_match_score", 0))
    print('best_match',best_match)
    print('all_results',all_results)
    return all_results

def extract(content):
    
    json_pattern = r'```\n(.*?)\n```'
    json_string = re.search(json_pattern, content, re.DOTALL).group(1)

    # Load the extracted JSON string into a dictionary
    data = json.loads(json_string)
    new={}
    # Print the extracted variables and their values
    for key, value in data.items():
        print(f"{key}: {value}")
        new[key]=value
    return new
def extract_mist(json_string):
       # Load the extracted JSON string into a dictionary
    data = json.loads(json_string)
    new={}
    # Print the extracted variables and their values
    for key, value in data.items():
        print(f"{key}: {value}")
        new[key]=value
    return new


def extract_sel(content):
    try:
        # Split the content by identifying each candidate section using the candidate names (bolded)
        candidates = re.split(r'\*\*(.*?)\*\*', content)  # Split on the pattern of bolded names
        
        # The split result will have alternating candidate names and JSON sections
        candidate_json_list = []
        
        for i in range(1, len(candidates), 2):  # Iterate over candidate name and their JSON parts
            candidate_name = candidates[i].strip()  # Candidate name
            json_string = candidates[i+1].strip()   # JSON string part
            
            # Load the JSON string into a dictionary
            candidate_data = json.loads(json_string)
            candidate_json_list.append(candidate_data)
        
        return candidate_json_list
    
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return []

def generate_adv(job_listing_text,job_title_text, prompt_template):
   # if model_selection=="Groq":
    agent = get_agent_groq()
    resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text))
    text = resp.content
    print(text)
    return text