datasciencedojo commited on
Commit
416760a
1 Parent(s): 4c56ef8

Update utils/utils.py

Browse files
Files changed (1) hide show
  1. utils/utils.py +97 -0
utils/utils.py CHANGED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ from agents.agents import get_agent_groq
3
+ import json
4
+ import re
5
+
6
+
7
+ def parse_resume(path):
8
+ loader = PdfReader(path)
9
+ text=''
10
+ print(len(loader.pages))
11
+ for i in range(len(loader.pages)):
12
+ text+= loader.pages[i].extract_text()
13
+ return text
14
+ def parse_resumes(resumes_list):
15
+ resumes_text=[]
16
+ for resume in resumes_list:
17
+ loader = PdfReader(resume)
18
+ text=''
19
+ #print(len(loader.pages))
20
+ for i in range(len(loader.pages)):
21
+ text+= loader.pages[i].extract_text()
22
+ resumes_text.append(text)
23
+ return resumes_text
24
+
25
+
26
+ def generate_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
27
+ agent = get_agent_groq()
28
+ resp = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
29
+ text_res=extract(resp.content)
30
+ #text_res=extract(text_res)
31
+ #chain = prompt | agent
32
+ print(text_res)
33
+ #text = resp.content
34
+ return text_res
35
+
36
+ def generate_sel_analysis(resume_text, job_listing_text,job_title_text, must_have,prompt_template):
37
+ #chain = prompt | agent
38
+ agent = get_agent_groq()
39
+ response = agent.invoke(prompt_template.format(resume=resume_text, job_listing=job_listing_text,job_title_text=job_title_text,must_have=must_have))
40
+ print(response.content)
41
+ text_res=extract_sel(response.content)
42
+ print(text_res)
43
+ return text_res
44
+
45
+ def extract(content):
46
+
47
+ json_pattern = r'```\n(.*?)\n```'
48
+ json_string = re.search(json_pattern, content, re.DOTALL).group(1)
49
+
50
+ # Load the extracted JSON string into a dictionary
51
+ data = json.loads(json_string)
52
+ new={}
53
+ # Print the extracted variables and their values
54
+ for key, value in data.items():
55
+ print(f"{key}: {value}")
56
+ new[key]=value
57
+ return new
58
+ def extract_mist(json_string):
59
+ # Load the extracted JSON string into a dictionary
60
+ data = json.loads(json_string)
61
+ new={}
62
+ # Print the extracted variables and their values
63
+ for key, value in data.items():
64
+ print(f"{key}: {value}")
65
+ new[key]=value
66
+ return new
67
+
68
+
69
+ def extract_sel(content):
70
+ try:
71
+ # Split the content by identifying each candidate section using the candidate names (bolded)
72
+ candidates = re.split(r'\*\*(.*?)\*\*', content) # Split on the pattern of bolded names
73
+
74
+ # The split result will have alternating candidate names and JSON sections
75
+ candidate_json_list = []
76
+
77
+ for i in range(1, len(candidates), 2): # Iterate over candidate name and their JSON parts
78
+ candidate_name = candidates[i].strip() # Candidate name
79
+ json_string = candidates[i+1].strip() # JSON string part
80
+
81
+ # Load the JSON string into a dictionary
82
+ candidate_data = json.loads(json_string)
83
+ candidate_json_list.append(candidate_data)
84
+
85
+ return candidate_json_list
86
+
87
+ except json.JSONDecodeError as e:
88
+ print(f"Error decoding JSON: {e}")
89
+ return []
90
+
91
+ def generate_adv(job_listing_text,job_title_text, prompt_template):
92
+ # if model_selection=="Groq":
93
+ agent = get_agent_groq()
94
+ resp = agent.invoke(prompt_template.format(job_listing=job_listing_text,job_title_text=job_title_text))
95
+ text = resp.content
96
+ print(text)
97
+ return text