Vaibhav84 commited on
Commit
ce5ba9e
·
1 Parent(s): 2885677
Files changed (2) hide show
  1. SkillExtract_Backup.py +236 -0
  2. SkillMatcher.py +108 -0
SkillExtract_Backup.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import psycopg2
2
+ from psycopg2 import sql
3
+ import pandas as pd
4
+ import re
5
+ class SkillExtractorDetails:
6
+
7
+ def GetSkillId(skillname,jdmasterid,db_params):
8
+ #Fetching skill id from skillmaster
9
+ conn = psycopg2.connect(**db_params)
10
+ cursor = conn.cursor()
11
+ query = "select skillid from skillmaster where upper(skilldetails) = (%s)"
12
+ params = (skillname.upper(),)
13
+ cursor.execute(query, params)
14
+ generated_skill_id = cursor.fetchone()[0]
15
+ #jdmasterid = 912
16
+ #print(generated_skill_id)
17
+ #checking if skill id already in skilldetails
18
+ query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
19
+ params = (generated_skill_id,jdmasterid,)
20
+ cursor.execute(query, params)
21
+ if cursor.rowcount > 0:
22
+ #print("Already")
23
+ query =''
24
+ else:
25
+ #print("Updating in DB")
26
+ insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
27
+ cursor.execute(insert_query, (generated_skill_id, jdmasterid))
28
+ conn.commit()
29
+
30
+ cursor.close()
31
+ # Close the connection
32
+ conn.close()
33
+ return generated_skill_id
34
+ def skill_Validate(df, skill):
35
+ skill = skill.upper()
36
+ if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
37
+ df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
38
+ if any(df['skill_present']):
39
+ #print("Valid Skill")
40
+ return 1
41
+ else:
42
+ #print("Not a Skill")
43
+ return 0
44
+ elif df['skilldetails'].str.contains(skill.upper()).any():
45
+ #print("Valid Skill")
46
+ return 1
47
+ else:
48
+ # print("Not a Skill")
49
+ return 0
50
+ def getNewSkills(db_params):
51
+ query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
52
+ conn = psycopg2.connect(**db_params)
53
+ cursor = conn.cursor()
54
+ df_skill_master = pd.read_sql_query(query, conn)
55
+ df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
56
+ cursor.close()
57
+ # Close the connection
58
+ conn.close()
59
+
60
+ #print(df_skill_master)
61
+ return df_skill_master
62
+ def extractWords(job_description,JdMasterid,db_params):
63
+ job_roles = []
64
+ job_description = job_description.replace(')',' ')
65
+ delimiters = ",", " ", " , ", ";","\n","/","\\"
66
+ regex_pattern = '|'.join(map(re.escape, delimiters))
67
+ df = SkillExtractorDetails.getNewSkills(db_params)
68
+ data = re.split(regex_pattern, job_description)
69
+ #data = job_description.split(',')
70
+ for ds in data:
71
+ #print(ds)
72
+ try:
73
+ if(SkillExtractorDetails.skill_Validate(df,ds.strip())):
74
+ job_roles.append(ds)
75
+ SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params)
76
+ print("Skills Identified* : " + ds)
77
+ except Exception as error:
78
+ test = 1
79
+ return job_roles
80
+ def SkillExtract(db_params,skill_extractor,JdID):
81
+ print("Extracting Skills for the JD...")
82
+ # Connect to the PostgreSQL database
83
+ conn = psycopg2.connect(**db_params)
84
+ cursor = conn.cursor()
85
+
86
+ jd_id = str(JdID)
87
+ # Retrieve "id" and "description" columns from the table
88
+ #query = sql.SQL("select jdmasterid,jobdescription from JDMaster where isskillsextracted in (0)")
89
+ query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted = 0 and jdmasterid ="+ jd_id
90
+
91
+ # Use Pandas to read the data into a DataFrame
92
+ df = pd.read_sql_query(query, conn)
93
+
94
+ # Print the DataFrame (for demonstration purposes)
95
+ #print(df)
96
+
97
+ skill_details = ''
98
+ skill_type = ''
99
+ weightage = -1.0
100
+ is_active = True
101
+ Skillid = 0
102
+ jdMasterid = 0
103
+ OldSkillCount = 0
104
+ NewSkillCount = 0
105
+ if(len(df.index) > 0):
106
+ print("Total JDs for Extractraction : " + str(len(df.index)))
107
+ for index, row in df.iterrows():
108
+ # Access individual columns using column names
109
+ id_value = row['jdmasterid']
110
+ filename_jd = row['filename']
111
+ OldSkillCount = 0
112
+ NewSkillCount = 0
113
+ skill_score = 0.0
114
+ print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1))
115
+
116
+ description_value = row['jobdescription']
117
+ #print(description_value)
118
+
119
+ annotations = skill_extractor.annotate(description_value)
120
+ matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
121
+ skills_list = []
122
+ for result in matches:
123
+ if(1==1):
124
+
125
+ isOld = "Yes"
126
+ skill_id = result['skill_id']
127
+ skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
128
+ skill_name = skill_name1.split("(")[0].strip()
129
+ skill_type = skill_extractor.skills_db[skill_id]['skill_type']
130
+ skill_score = round(result['score'],2)
131
+
132
+
133
+ if( skill_name in skills_list):
134
+ continue
135
+ skills_list.append(skill_name)
136
+ #print("Skill Identified : ", j['doc_node_value'])
137
+ query = "SELECT skillid FROM skillmaster WHERE skillDetails IN (%s)"
138
+ params = (skill_name,) # Replace 'Test' with your actual variable or user input
139
+ cursor.execute(query, params)
140
+ if cursor.rowcount > 0:
141
+ print("Skill Identified : ", skill_name)
142
+ result = cursor.fetchall()
143
+ for row in result:
144
+ row_as_int = [int(element) for element in row]
145
+ #print("Skill Already in SkillMaster")
146
+ OldSkillCount = OldSkillCount + 1
147
+ isOld = "Yes"
148
+ query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
149
+ params = (row_as_int[0],id_value,)
150
+ cursor.execute(query, params)
151
+ if cursor.rowcount > 0:
152
+ weightage = -1.0
153
+ #print("Skill Already in SkillMaster and JDSkillDetails")
154
+ else:
155
+ Skillid = row_as_int[0]
156
+ jdMasterid = id_value
157
+ insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
158
+ cursor.execute(insert_query, (Skillid, jdMasterid))
159
+ conn.commit()
160
+ #print("Skill Already in SkillMaster and Inserted in JDSkillDetails")
161
+ #print(row_as_int)
162
+ else:
163
+ NewSkillCount = NewSkillCount + 1
164
+ isOld = "No"
165
+ skill_details = skill_name
166
+ weightage = -1.0
167
+ skill_score = skill_score * 100
168
+ skill_score1 = str(skill_score)
169
+ #skill_score = skill_score.astype(float)
170
+ #print(skill_score)
171
+ insert_query = sql.SQL("""INSERT INTO SkillMaster (SkillDetails, SkillType, Weightage, IsActive, skill_score)
172
+ VALUES (%s, %s, %s, %s, %s) RETURNING SkillID""")
173
+ cursor.execute(insert_query, (skill_details, skill_type, weightage, is_active, skill_score1))
174
+ conn.commit()
175
+ generated_skill_id = cursor.fetchone()[0]
176
+ Skillid = generated_skill_id
177
+ jdMasterid = id_value
178
+ insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
179
+ cursor.execute(insert_query, (Skillid, jdMasterid))
180
+ conn.commit()
181
+ print("Skill Identified : ", skill_name)
182
+ #print("Skill inserted in SkillMaster and Inserted in JDSkillDetails")
183
+ SkillExtractorDetails.extractWords(description_value,id_value,db_params)
184
+ query = "update public.jdmaster set isskillsextracted = 1 where jdmasterid = (%s)"
185
+
186
+ params = (id_value,)
187
+ cursor.execute(query, params)
188
+ conn.commit()
189
+ print("Skills Updated for Skills Extraction for file ", filename_jd)
190
+ print("Total Skills : ", len(skills_list))
191
+ return SkillExtractorDetails.latestSkillDetails(id_value,db_params)
192
+ def latestSkillDetails(jid,db_params):
193
+ data = ""
194
+ data = SkillExtractorDetails.display_skills(jid,db_params)
195
+ # jid = df.iat[0,0]
196
+ return data
197
+ def tuple_to_int(tup):
198
+ if len(tup) == 1:
199
+ return tup[0]
200
+ else:
201
+ return tup[0] * (10 ** (len(tup) - 1)) + SkillExtractorDetails.tuple_to_int(tup[1:])
202
+
203
+
204
+ def skill_check(dbQuery,db_params):
205
+ conn = psycopg2.connect(**db_params)
206
+ cursor = conn.cursor()
207
+ df = pd.read_sql_query(dbQuery, conn)
208
+ Required_Skills=''
209
+ for index, row in df.iterrows():
210
+
211
+ skillname = row['skillname']
212
+ Required_Skills = Required_Skills + ', '+ skillname
213
+
214
+ Required_Skills = Required_Skills[2:]
215
+ return Required_Skills
216
+ def display_skills(id, db_params):
217
+ jd=str(id)
218
+ query = "select skillname from SkillDetails where id = "+ jd +" and skillscore > 99 and skilltype = 'Hard Skill'"
219
+ RequiredSkills_Hard = SkillExtractorDetails.skill_check(query,db_params)
220
+
221
+ query = "select skillname from SkillDetails where id = "+ jd +" and skillscore > 50 and skilltype = 'Soft Skill'"
222
+ RequiredSkills_Soft = SkillExtractorDetails.skill_check(query,db_params)
223
+
224
+ query = "select skillname from SkillDetails where id = "+ jd +" and skillscore < 50 and skilltype = 'Soft Skill'"
225
+ RequiredSkills_G1 = SkillExtractorDetails.skill_check(query,db_params)
226
+
227
+ query = "select skillname from SkillDetails where id = "+ jd +" and skillscore < 99 and skilltype = 'Hard Skill'"
228
+ RequiredSkills_G2 = SkillExtractorDetails.skill_check(query,db_params)
229
+
230
+ print('')
231
+ print("Required Skills : " + RequiredSkills_Hard)
232
+ print('')
233
+ print("Required Soft Skills : " + RequiredSkills_Soft)
234
+ print('')
235
+ print("Good to have Skills : " + RequiredSkills_G1 + " " + RequiredSkills_G2)
236
+ return RequiredSkills_Hard + "@" + RequiredSkills_Soft + "@" + RequiredSkills_G1 + " " + RequiredSkills_G2
SkillMatcher.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import psycopg2
2
+ import pandas as pd
3
+ from sentence_transformers import SentenceTransformer, util
4
+ class SkillMatch:
5
+ def SkillMatcher(model,db_params,jdID):
6
+ print("Checking Best Course for the JD...")
7
+ conn = psycopg2.connect(**db_params)
8
+ cursor_obj = conn.cursor()
9
+
10
+ query = "select * from JDDetailsAllSkill where jdmasterid =" + jdID
11
+ cursor_obj.execute(query)
12
+ jd_data = cursor_obj.fetchall()
13
+ #connection_obj.commit()
14
+ print(jd_data)
15
+ query = "select * from CourseDetailsForMatching"
16
+ cursor_obj.execute(query)
17
+ cv_data = cursor_obj.fetchall()
18
+ print(cv_data)
19
+ #connection_obj.commit()
20
+ query = "select jdmasterid || '-' || courseid from courseskillmatch"
21
+ cursor_obj.execute(query)
22
+ match_data = cursor_obj.fetchall()
23
+
24
+ jd_skills = {}
25
+ for obj in jd_data:
26
+ if obj[0] not in jd_skills:
27
+ jd_skills[obj[0]] = []
28
+
29
+ jd_skills[obj[0]].append(obj[1])
30
+
31
+ cv_skills = {}
32
+ for obj in cv_data:
33
+ if obj[0] not in cv_skills:
34
+ cv_skills[obj[0]] = []
35
+
36
+ cv_skills[obj[0]].append(obj[1])
37
+
38
+
39
+ count = 0
40
+ MatchSkillsId = 0
41
+ isAlreadyInDb = False
42
+ TopScore = 0
43
+ CourseId = 0
44
+ for jd in jd_skills:
45
+ for cv in cv_skills:
46
+ #if(cv in match_data[1] and jd in match_data[0]):
47
+ #print("Already record : " + str(cv) + " , " + str(jd))
48
+ isAlreadyInDb = False
49
+ match_details = str(jd) + "-" + str(cv)
50
+ print("Checking for existing Profile")
51
+ for i in match_data:
52
+ if(i[0] == match_details):
53
+ print( "Already in Database -----------" + i[0])
54
+ isAlreadyInDb = True
55
+ break
56
+
57
+ if(isAlreadyInDb == True):
58
+ continue
59
+ #print(match_details)
60
+ print("Running Matching Algo")
61
+ count += 1
62
+ sentence1 = " ".join(cv_skills[cv])
63
+ sentence2 = " ".join(jd_skills[jd])
64
+ embedding1 = model.encode(sentence1, convert_to_tensor=True)
65
+ embedding2 = model.encode(sentence2, convert_to_tensor=True)
66
+
67
+ # Compute cosine similarity between the two sentence embeddings
68
+ cosine_similarit = util.cos_sim(embedding1, embedding2)
69
+ if(TopScore < cosine_similarit * 100):
70
+ TopScore = cosine_similarit * 100
71
+ CourseId = cv
72
+
73
+ print("DB Entry for Matching Results")
74
+ #common = set(cv_skills[cv]) & set(jd_skills[jd])
75
+ if(1==1):
76
+ if(MatchSkillsId == 0):
77
+ query = "select coalesce(max(skillmatchid),0) + 1 from courseskillmatch"
78
+ cursor_obj.execute(query)
79
+ MatchId = cursor_obj.fetchall()
80
+ MatchSkillsId = SkillMatch.tuple_to_int( MatchId[0])
81
+
82
+
83
+
84
+ if(1==1):
85
+ record = (MatchSkillsId, cv, jd, cosine_similarit[0][0].item(),1)
86
+ query = """INSERT INTO public.courseskillmatch(SkillMatchID, courseid, JDMasterID, MatchScore,isactive) VALUES (%s,%s,%s,%s,%s)"""
87
+ cursor_obj.execute(query, record)
88
+ conn.commit()
89
+ MatchSkillsId = MatchSkillsId + 1
90
+ print( str( MatchSkillsId) + " "+"Updating in DB - JD {} CV {} ".format(jd, cv), cosine_similarit[0][0].item())
91
+ #print(TopScore)
92
+ print(CourseId)
93
+ query = "select filename from coursemaster where masterid = " + str(CourseId)
94
+ df = pd.read_sql_query(query, conn)
95
+ try:
96
+ MatchId = df.iat[0,0].split('.')[0]
97
+ except:
98
+ print(CourseId)
99
+ print("------------------------Beta Results - " + MatchId)
100
+ cursor_obj.close()
101
+ conn.close()
102
+ return MatchId
103
+
104
+ def tuple_to_int(tup):
105
+ if len(tup) == 1:
106
+ return tup[0]
107
+ else:
108
+ return tup[0] * (10 ** (len(tup) - 1)) + SkillMatch.tuple_to_int(tup[1:])