Vaibhav84 commited on
Commit
90c20ac
·
1 Parent(s): 9e0b38e

Db Connection added

Browse files
Files changed (1) hide show
  1. SkillExtract.py +245 -0
SkillExtract.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import date
2
+ import psycopg2
3
+ from psycopg2 import sql
4
+ import pandas as pd
5
+ import re
6
+ class SkillExtractor:
7
+
8
+ def GetSkillId(skillname,jdmasterid,db_params):
9
+ #Fetching skill id from skillmaster
10
+ conn = psycopg2.connect(**db_params)
11
+ cursor = conn.cursor()
12
+ query = "select skillid from skillmaster where upper(skilldetails) = (%s)"
13
+ params = (skillname.upper(),)
14
+ cursor.execute(query, params)
15
+ generated_skill_id = cursor.fetchone()[0]
16
+ #jdmasterid = 912
17
+ #print(generated_skill_id)
18
+ #checking if skill id already in skilldetails
19
+ query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
20
+ params = (generated_skill_id,jdmasterid,)
21
+ cursor.execute(query, params)
22
+ if cursor.rowcount > 0:
23
+ #print("Already")
24
+ query =''
25
+ else:
26
+ #print("Updating in DB")
27
+ insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
28
+ cursor.execute(insert_query, (generated_skill_id, jdmasterid))
29
+ conn.commit()
30
+
31
+ cursor.close()
32
+ # Close the connection
33
+ conn.close()
34
+ return generated_skill_id
35
+ def skill_Validate(df, skill):
36
+ skill = skill.upper()
37
+ if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
38
+ df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
39
+ if any(df['skill_present']):
40
+ #print("Valid Skill")
41
+ return 1
42
+ else:
43
+ #print("Not a Skill")
44
+ return 0
45
+ elif df['skilldetails'].str.contains(skill.upper()).any():
46
+ #print("Valid Skill")
47
+ return 1
48
+ else:
49
+ # print("Not a Skill")
50
+ return 0
51
+ def getNewSkills(db_params):
52
+ query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
53
+ conn = psycopg2.connect(**db_params)
54
+ cursor = conn.cursor()
55
+ df_skill_master = pd.read_sql_query(query, conn)
56
+ df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
57
+ cursor.close()
58
+ # Close the connection
59
+ conn.close()
60
+
61
+ #print(df_skill_master)
62
+ return df_skill_master
63
+ def extractWords(job_description,JdMasterid,db_params):
64
+ job_roles = []
65
+ job_description = job_description.replace(')',' ')
66
+ delimiters = ",", " ", " , ", ";","\n","/","\\"
67
+ regex_pattern = '|'.join(map(re.escape, delimiters))
68
+ df = SkillExtractor.getNewSkills(db_params)
69
+ data = re.split(regex_pattern, job_description)
70
+ #data = job_description.split(',')
71
+ for ds in data:
72
+ #print(ds)
73
+ try:
74
+ if(SkillExtractor.skill_Validate(df,ds.strip())):
75
+ job_roles.append(ds)
76
+ SkillExtractor.GetSkillId(ds.strip(),JdMasterid,db_params)
77
+ print("Skills Identified* : " + ds)
78
+ except Exception as error:
79
+ test = 1
80
+ return job_roles
81
+ def SkillExtract(db_params,skill_extractor,JdID):
82
+ print("Extracting Skills for the JD...")
83
+ # Connect to the PostgreSQL database
84
+ conn = psycopg2.connect(**db_params)
85
+ cursor = conn.cursor()
86
+
87
+
88
+ # Retrieve "id" and "description" columns from the table
89
+ #query = sql.SQL("select jdmasterid,jobdescription from JDMaster where isskillsextracted in (0)")
90
+ query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted = 0 and jdmasterid = " + JdID
91
+
92
+ # Use Pandas to read the data into a DataFrame
93
+ df = pd.read_sql_query(query, conn)
94
+
95
+ # Print the DataFrame (for demonstration purposes)
96
+ #print(df)
97
+
98
+ skill_details = ''
99
+ skill_type = ''
100
+ weightage = -1.0
101
+ is_active = True
102
+ Skillid = 0
103
+ jdMasterid = 0
104
+ OldSkillCount = 0
105
+ NewSkillCount = 0
106
+ if(len(df.index) > 0):
107
+ print("Total JDs for Extractraction : " + str(len(df.index)))
108
+ for index, row in df.iterrows():
109
+ # Access individual columns using column names
110
+ id_value = row['jdmasterid']
111
+ filename_jd = row['filename']
112
+ OldSkillCount = 0
113
+ NewSkillCount = 0
114
+ skill_score = 0.0
115
+ print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1))
116
+
117
+ description_value = row['jobdescription']
118
+ #print(description_value)
119
+
120
+ annotations = skill_extractor.annotate(description_value)
121
+ matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
122
+ skills_list = []
123
+ for result in matches:
124
+ if(1==1):
125
+
126
+ isOld = "Yes"
127
+ skill_id = result['skill_id']
128
+ skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
129
+ skill_name = skill_name1.split("(")[0].strip()
130
+ skill_type = skill_extractor.skills_db[skill_id]['skill_type']
131
+ skill_score = round(result['score'],2)
132
+
133
+
134
+ if( skill_name in skills_list):
135
+ continue
136
+ skills_list.append(skill_name)
137
+ #print("Skill Identified : ", j['doc_node_value'])
138
+ query = "SELECT skillid FROM skillmaster WHERE skillDetails IN (%s)"
139
+ params = (skill_name,) # Replace 'Test' with your actual variable or user input
140
+ cursor.execute(query, params)
141
+ if cursor.rowcount > 0:
142
+ print("Skill Identified : ", skill_name)
143
+ result = cursor.fetchall()
144
+ for row in result:
145
+ row_as_int = [int(element) for element in row]
146
+ #print("Skill Already in SkillMaster")
147
+ OldSkillCount = OldSkillCount + 1
148
+ isOld = "Yes"
149
+ query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
150
+ params = (row_as_int[0],id_value,)
151
+ cursor.execute(query, params)
152
+ if cursor.rowcount > 0:
153
+ weightage = -1.0
154
+ #print("Skill Already in SkillMaster and JDSkillDetails")
155
+ else:
156
+ Skillid = row_as_int[0]
157
+ jdMasterid = id_value
158
+ insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
159
+ cursor.execute(insert_query, (Skillid, jdMasterid))
160
+ conn.commit()
161
+ #print("Skill Already in SkillMaster and Inserted in JDSkillDetails")
162
+ #print(row_as_int)
163
+ else:
164
+ NewSkillCount = NewSkillCount + 1
165
+ isOld = "No"
166
+ skill_details = skill_name
167
+ weightage = -1.0
168
+ skill_score = skill_score * 100
169
+ skill_score1 = str(skill_score)
170
+ #skill_score = skill_score.astype(float)
171
+ #print(skill_score)
172
+ insert_query = sql.SQL("""INSERT INTO SkillMaster (SkillDetails, SkillType, Weightage, IsActive, skill_score)
173
+ VALUES (%s, %s, %s, %s, %s) RETURNING SkillID""")
174
+ cursor.execute(insert_query, (skill_details, skill_type, weightage, is_active, skill_score1))
175
+ conn.commit()
176
+ generated_skill_id = cursor.fetchone()[0]
177
+ Skillid = generated_skill_id
178
+ jdMasterid = id_value
179
+ insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
180
+ cursor.execute(insert_query, (Skillid, jdMasterid))
181
+ conn.commit()
182
+ print("Skill Identified : ", skill_name)
183
+ #print("Skill inserted in SkillMaster and Inserted in JDSkillDetails")
184
+ SkillExtractor.extractWords(description_value,id_value,db_params)
185
+ query = "update public.jdmaster set isskillsextracted = 1 where jdmasterid = (%s)"
186
+
187
+ params = (id_value,)
188
+ cursor.execute(query, params)
189
+ conn.commit()
190
+ print("Skills Updated for Skills Extraction for file ", filename_jd)
191
+ print("Total Skills : ", len(skills_list))
192
+ return SkillExtractor.latestSkillDetails(id_value,db_params)
193
+ def latestSkillDetails(jid,db_params):
194
+ query = "select * from jdmaster where isskillsextracted=1 order by jdmasterid desc limit 1 "
195
+ conn = psycopg2.connect(**db_params)
196
+ df = pd.read_sql_query(query, conn)
197
+ filename = df.iat[0,2]
198
+ fileId = df.iat[0,0]
199
+
200
+ upload = df.iat[0,3]
201
+ if(fileId != jid):
202
+ print("Skill Details for File : " + str(filename) + " , ID " + str(fileId) + " , Uploaded on " + str(upload))
203
+ data = SkillExtractor.display_skills(fileId)
204
+ jid = df.iat[0,0]
205
+ return data
206
+ def tuple_to_int(tup):
207
+ if len(tup) == 1:
208
+ return tup[0]
209
+ else:
210
+ return tup[0] * (10 ** (len(tup) - 1)) + SkillExtractor.tuple_to_int(tup[1:])
211
+
212
+
213
+ def skill_check(dbQuery,db_params):
214
+ conn = psycopg2.connect(**db_params)
215
+ cursor = conn.cursor()
216
+ df = pd.read_sql_query(dbQuery, conn)
217
+ Required_Skills=''
218
+ for index, row in df.iterrows():
219
+
220
+ skillname = row['skillname']
221
+ Required_Skills = Required_Skills + ', '+ skillname
222
+
223
+ Required_Skills = Required_Skills[2:]
224
+ return Required_Skills
225
+ def display_skills(id):
226
+ jd=str(id)
227
+ query = "select skillname from SkillDetails where id = "+ jd +" and skillscore > 99 and skilltype = 'Hard Skill'"
228
+ RequiredSkills_Hard = SkillExtractor.skill_check(query)
229
+
230
+ query = "select skillname from SkillDetails where id = "+ jd +" and skillscore > 50 and skilltype = 'Soft Skill'"
231
+ RequiredSkills_Soft = SkillExtractor.skill_check(query)
232
+
233
+ query = "select skillname from SkillDetails where id = "+ jd +" and skillscore < 50 and skilltype = 'Soft Skill'"
234
+ RequiredSkills_G1 = SkillExtractor.skill_check(query)
235
+
236
+ query = "select skillname from SkillDetails where id = "+ jd +" and skillscore < 99 and skilltype = 'Hard Skill'"
237
+ RequiredSkills_G2 = SkillExtractor.skill_check(query)
238
+
239
+ print('')
240
+ print("Required Skills : " + RequiredSkills_Hard)
241
+ print('')
242
+ print("Required Soft Skills : " + RequiredSkills_Soft)
243
+ print('')
244
+ print("Good to have Skills : " + RequiredSkills_G1 + " " + RequiredSkills_G2)
245
+ return RequiredSkills_Hard + "@" + RequiredSkills_Soft + "@" + RequiredSkills_G1 + "@" + RequiredSkills_G2