Vaibhav84 commited on
Commit
eabd638
·
1 Parent(s): 2c0502b
Files changed (2) hide show
  1. SkillExtract.py +72 -31
  2. app.py +1 -1
SkillExtract.py CHANGED
@@ -77,7 +77,7 @@ class SkillExtractorDetails:
77
  except Exception as error:
78
  test = 1
79
  return job_roles
80
- def SkillExtract(db_params,skill_extractor,JdID):
81
  print("Extracting Skills for the JD...")
82
  # Connect to the PostgreSQL database
83
  conn = psycopg2.connect(**db_params)
@@ -87,9 +87,12 @@ class SkillExtractorDetails:
87
  # Retrieve "id" and "description" columns from the table
88
  #query = sql.SQL("select jdmasterid,jobdescription from JDMaster where isskillsextracted in (0)")
89
  query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted = 0 and jdmasterid ="+ jd_id
90
-
91
  # Use Pandas to read the data into a DataFrame
92
- df = pd.read_sql_query(query, conn)
 
 
 
93
 
94
  # Print the DataFrame (for demonstration purposes)
95
  #print(df)
@@ -103,18 +106,22 @@ class SkillExtractorDetails:
103
  OldSkillCount = 0
104
  NewSkillCount = 0
105
  if(len(df.index) > 0):
106
- print("Total JDs for Extractraction : " + str(len(df.index)))
107
  for index, row in df.iterrows():
108
  # Access individual columns using column names
109
- id_value = row['jdmasterid']
 
 
 
 
 
 
110
  filename_jd = row['filename']
111
  OldSkillCount = 0
112
  NewSkillCount = 0
113
  skill_score = 0.0
114
  print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1))
115
 
116
- description_value = row['jobdescription']
117
- #print(description_value)
118
 
119
  annotations = skill_extractor.annotate(description_value)
120
  matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
@@ -145,18 +152,33 @@ class SkillExtractorDetails:
145
  #print("Skill Already in SkillMaster")
146
  OldSkillCount = OldSkillCount + 1
147
  isOld = "Yes"
148
- query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
 
 
 
149
  params = (row_as_int[0],id_value,)
150
  cursor.execute(query, params)
151
  if cursor.rowcount > 0:
152
  weightage = -1.0
153
  #print("Skill Already in SkillMaster and JDSkillDetails")
154
  else:
155
- Skillid = row_as_int[0]
156
- jdMasterid = id_value
157
- insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
158
- cursor.execute(insert_query, (Skillid, jdMasterid))
159
- conn.commit()
 
 
 
 
 
 
 
 
 
 
 
 
160
  #print("Skill Already in SkillMaster and Inserted in JDSkillDetails")
161
  #print(row_as_int)
162
  else:
@@ -175,23 +197,37 @@ class SkillExtractorDetails:
175
  generated_skill_id = cursor.fetchone()[0]
176
  Skillid = generated_skill_id
177
  jdMasterid = id_value
178
- insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
179
- cursor.execute(insert_query, (Skillid, jdMasterid))
180
- conn.commit()
 
 
 
 
 
 
 
 
 
181
  print("Skill Identified : ", skill_name)
182
  #print("Skill inserted in SkillMaster and Inserted in JDSkillDetails")
183
- SkillExtractorDetails.extractWords(description_value,id_value,db_params)
184
- query = "update public.jdmaster set isskillsextracted = 1 where jdmasterid = (%s)"
 
 
 
 
 
185
 
186
  params = (id_value,)
187
  cursor.execute(query, params)
188
  conn.commit()
189
  print("Skills Updated for Skills Extraction for file ", filename_jd)
190
  print("Total Skills : ", len(skills_list))
191
- return SkillExtractorDetails.latestSkillDetails(id_value,db_params)
192
- def latestSkillDetails(jid,db_params):
193
  data = ""
194
- data = SkillExtractorDetails.display_skills(jid,db_params)
195
  # jid = df.iat[0,0]
196
  return data
197
  def tuple_to_int(tup):
@@ -201,7 +237,7 @@ class SkillExtractorDetails:
201
  return tup[0] * (10 ** (len(tup) - 1)) + SkillExtractorDetails.tuple_to_int(tup[1:])
202
 
203
 
204
- def skill_check(dbQuery,db_params):
205
  conn = psycopg2.connect(**db_params)
206
  cursor = conn.cursor()
207
  df = pd.read_sql_query(dbQuery, conn)
@@ -213,19 +249,24 @@ class SkillExtractorDetails:
213
 
214
  Required_Skills = Required_Skills[2:]
215
  return Required_Skills
216
- def display_skills(id, db_params):
217
  jd=str(id)
218
- query = "select skillname from SkillDetails where id = "+ jd +" and skillscore > 99 and skilltype = 'Hard Skill'"
219
- RequiredSkills_Hard = SkillExtractorDetails.skill_check(query,db_params)
 
 
 
 
 
220
 
221
- query = "select skillname from SkillDetails where id = "+ jd +" and skillscore > 50 and skilltype = 'Soft Skill'"
222
- RequiredSkills_Soft = SkillExtractorDetails.skill_check(query,db_params)
223
 
224
- query = "select skillname from SkillDetails where id = "+ jd +" and skillscore < 50 and skilltype = 'Soft Skill'"
225
- RequiredSkills_G1 = SkillExtractorDetails.skill_check(query,db_params)
226
 
227
- query = "select skillname from SkillDetails where id = "+ jd +" and skillscore < 99 and skilltype = 'Hard Skill'"
228
- RequiredSkills_G2 = SkillExtractorDetails.skill_check(query,db_params)
229
 
230
  print('')
231
  print("Required Skills : " + RequiredSkills_Hard)
 
77
  except Exception as error:
78
  test = 1
79
  return job_roles
80
+ def SkillExtract(db_params,skill_extractor,JdID, IsJD):
81
  print("Extracting Skills for the JD...")
82
  # Connect to the PostgreSQL database
83
  conn = psycopg2.connect(**db_params)
 
87
  # Retrieve "id" and "description" columns from the table
88
  #query = sql.SQL("select jdmasterid,jobdescription from JDMaster where isskillsextracted in (0)")
89
  query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted = 0 and jdmasterid ="+ jd_id
90
+ CVquery = "select masterid,description,filename from CourseMaster where isskillsextracted = 0 and masterid ="+ jd_id
91
  # Use Pandas to read the data into a DataFrame
92
+ if(IsJD):
93
+ df = pd.read_sql_query(query, conn)
94
+ else:
95
+ df = pd.read_sql_query(query, conn)
96
 
97
  # Print the DataFrame (for demonstration purposes)
98
  #print(df)
 
106
  OldSkillCount = 0
107
  NewSkillCount = 0
108
  if(len(df.index) > 0):
109
+ print("Total IDs for Extractraction : " + str(len(df.index)))
110
  for index, row in df.iterrows():
111
  # Access individual columns using column names
112
+ if(IsJD):
113
+ id_value = row['jdmasterid']
114
+ description_value = row['jobdescription']
115
+ else:
116
+ id_value = row['masterid']
117
+ description_value = row['description']
118
+
119
  filename_jd = row['filename']
120
  OldSkillCount = 0
121
  NewSkillCount = 0
122
  skill_score = 0.0
123
  print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1))
124
 
 
 
125
 
126
  annotations = skill_extractor.annotate(description_value)
127
  matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
 
152
  #print("Skill Already in SkillMaster")
153
  OldSkillCount = OldSkillCount + 1
154
  isOld = "Yes"
155
+ if(IsJD):
156
+ query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
157
+ else:
158
+ query = "SELECT skillid FROM CourseSkilldetails WHERE skillid IN (%s) and Masterid in (%s)"
159
  params = (row_as_int[0],id_value,)
160
  cursor.execute(query, params)
161
  if cursor.rowcount > 0:
162
  weightage = -1.0
163
  #print("Skill Already in SkillMaster and JDSkillDetails")
164
  else:
165
+ if(IsJD):
166
+ Skillid = row_as_int[0]
167
+ jdMasterid = id_value
168
+ insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
169
+ cursor.execute(insert_query, (Skillid, jdMasterid))
170
+ conn.commit()
171
+ else:
172
+ query = "Select max(skilldetailsid) from courseskilldetails"
173
+ df = pd.read_sql_query(query, conn)
174
+ CourseID = df.iat[0,0] + 1
175
+
176
+ Skillid = row_as_int[0]
177
+ jdMasterid = id_value
178
+ insert_query = sql.SQL("""INSERT INTO Courseskilldetails (skilldetailsid, Skillid, Masterid) VALUES (%s, %s, %s)""")
179
+ cursor.execute(insert_query, (CourseID, Skillid, jdMasterid))
180
+ conn.commit()
181
+
182
  #print("Skill Already in SkillMaster and Inserted in JDSkillDetails")
183
  #print(row_as_int)
184
  else:
 
197
  generated_skill_id = cursor.fetchone()[0]
198
  Skillid = generated_skill_id
199
  jdMasterid = id_value
200
+ if(IsJD):
201
+ insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
202
+ cursor.execute(insert_query, (Skillid, jdMasterid))
203
+ conn.commit()
204
+ else:
205
+ query = "Select max(skilldetailsid) from courseskilldetails"
206
+ df = pd.read_sql_query(query, conn)
207
+ CourseID = df.iat[0,0] + 1
208
+
209
+ insert_query = sql.SQL("""INSERT INTO CourseSkilldetails (skilldetailsid,Skillid, Masterid) VALUES (%s, %s, %s)""")
210
+ cursor.execute(insert_query, (CourseID,Skillid, jdMasterid))
211
+ conn.commit()
212
  print("Skill Identified : ", skill_name)
213
  #print("Skill inserted in SkillMaster and Inserted in JDSkillDetails")
214
+
215
+
216
+ if(IsJD):
217
+ SkillExtractorDetails.extractWords(description_value,id_value,db_params)
218
+ query = "update public.jdmaster set isskillsextracted = 1 where jdmasterid = (%s)"
219
+ else:
220
+ query = "update public.coursemaster set isskillsextracted = 1 where masterid = (%s)"
221
 
222
  params = (id_value,)
223
  cursor.execute(query, params)
224
  conn.commit()
225
  print("Skills Updated for Skills Extraction for file ", filename_jd)
226
  print("Total Skills : ", len(skills_list))
227
+ return SkillExtractorDetails.latestSkillDetails(id_value,db_params,IsJD)
228
+ def latestSkillDetails(jid,db_params,IsJD):
229
  data = ""
230
+ data = SkillExtractorDetails.display_skills(jid,db_params,IsJD)
231
  # jid = df.iat[0,0]
232
  return data
233
  def tuple_to_int(tup):
 
237
  return tup[0] * (10 ** (len(tup) - 1)) + SkillExtractorDetails.tuple_to_int(tup[1:])
238
 
239
 
240
+ def skill_check(dbQuery,db_params,IsJD):
241
  conn = psycopg2.connect(**db_params)
242
  cursor = conn.cursor()
243
  df = pd.read_sql_query(dbQuery, conn)
 
249
 
250
  Required_Skills = Required_Skills[2:]
251
  return Required_Skills
252
+ def display_skills(id, db_params,IsJD):
253
  jd=str(id)
254
+ tableName = 'CVSkillDetails'
255
+ if(IsJD):
256
+ tableName = 'SkillDetails'
257
+
258
+
259
+ query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore > 99 and skilltype = 'Hard Skill'"
260
+ RequiredSkills_Hard = SkillExtractorDetails.skill_check(query,db_params,IsJD)
261
 
262
+ query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore > 50 and skilltype = 'Soft Skill'"
263
+ RequiredSkills_Soft = SkillExtractorDetails.skill_check(query,db_params,IsJD)
264
 
265
+ query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore < 50 and skilltype = 'Soft Skill'"
266
+ RequiredSkills_G1 = SkillExtractorDetails.skill_check(query,db_params,IsJD)
267
 
268
+ query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore < 99 and skilltype = 'Hard Skill'"
269
+ RequiredSkills_G2 = SkillExtractorDetails.skill_check(query,db_params,IsJD)
270
 
271
  print('')
272
  print("Required Skills : " + RequiredSkills_Hard)
app.py CHANGED
@@ -50,7 +50,7 @@ def parse_csv(df):
50
  def UploadJobDescription(file: bytes = File(...), FileName: str = "sample.pdf"):
51
  text= ExtractContentFromFile.ExtractDataFromFile(FileName,file)
52
  returnID = UploadOpenFile.uploadFile(text,FileName,db_params,True)
53
- returnSkills = SkillExtractorDetails.SkillExtract(db_params,skill_extractor,returnID)
54
  details = returnSkills.split('@')
55
  data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[details[0], details[1], details[2]]}
56
  df = pd.DataFrame(data)
 
50
  def UploadJobDescription(file: bytes = File(...), FileName: str = "sample.pdf"):
51
  text= ExtractContentFromFile.ExtractDataFromFile(FileName,file)
52
  returnID = UploadOpenFile.uploadFile(text,FileName,db_params,True)
53
+ returnSkills = SkillExtractorDetails.SkillExtract(db_params,skill_extractor,returnID,True)
54
  details = returnSkills.split('@')
55
  data = {'Data':['Required Skills', 'Soft Skills', 'Good to have Skills'], 'Values':[details[0], details[1], details[2]]}
56
  df = pd.DataFrame(data)