|
import psycopg2 |
|
from psycopg2 import sql |
|
import pandas as pd |
|
import re |
|
class SkillExtractorDetails: |
|
|
|
def GetSkillId(skillname,jdmasterid,db_params): |
|
|
|
conn = psycopg2.connect(**db_params) |
|
cursor = conn.cursor() |
|
query = "select skillid from skillmaster where upper(skilldetails) = (%s)" |
|
params = (skillname.upper(),) |
|
cursor.execute(query, params) |
|
generated_skill_id = cursor.fetchone()[0] |
|
|
|
|
|
|
|
query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)" |
|
params = (generated_skill_id,jdmasterid,) |
|
cursor.execute(query, params) |
|
if cursor.rowcount > 0: |
|
|
|
query ='' |
|
else: |
|
|
|
insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""") |
|
cursor.execute(insert_query, (generated_skill_id, jdmasterid)) |
|
conn.commit() |
|
|
|
cursor.close() |
|
|
|
conn.close() |
|
return generated_skill_id |
|
def skill_Validate(df, skill): |
|
skill = skill.upper() |
|
if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1: |
|
df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x)) |
|
if any(df['skill_present']): |
|
|
|
return 1 |
|
else: |
|
|
|
return 0 |
|
elif df['skilldetails'].str.contains(skill.upper()).any(): |
|
|
|
return 1 |
|
else: |
|
|
|
return 0 |
|
def getNewSkills(db_params): |
|
query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2" |
|
conn = psycopg2.connect(**db_params) |
|
cursor = conn.cursor() |
|
df_skill_master = pd.read_sql_query(query, conn) |
|
df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper() |
|
cursor.close() |
|
|
|
conn.close() |
|
|
|
|
|
return df_skill_master |
|
def extractWords(job_description,JdMasterid,db_params): |
|
job_roles = [] |
|
job_description = job_description.replace(')',' ') |
|
delimiters = ",", " ", " , ", ";","\n","/","\\" |
|
regex_pattern = '|'.join(map(re.escape, delimiters)) |
|
df = SkillExtractorDetails.getNewSkills(db_params) |
|
data = re.split(regex_pattern, job_description) |
|
|
|
for ds in data: |
|
|
|
try: |
|
if(SkillExtractorDetails.skill_Validate(df,ds.strip())): |
|
job_roles.append(ds) |
|
SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params) |
|
print("Skills Identified* : " + ds) |
|
except Exception as error: |
|
test = 1 |
|
return job_roles |
|
def SkillExtract(db_params,skill_extractor,JdID, IsJD): |
|
print("Extracting Skills for the JD...") |
|
|
|
conn = psycopg2.connect(**db_params) |
|
cursor = conn.cursor() |
|
|
|
jd_id = str(JdID) |
|
|
|
|
|
query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted = 0 and jdmasterid ="+ jd_id |
|
CVquery = "select masterid,description,filename from CourseMaster where isskillsextracted = 0 and masterid ="+ jd_id |
|
|
|
if(IsJD): |
|
df = pd.read_sql_query(query, conn) |
|
else: |
|
df = pd.read_sql_query(CVquery, conn) |
|
|
|
|
|
|
|
|
|
skill_details = '' |
|
skill_type = '' |
|
weightage = -1.0 |
|
is_active = True |
|
Skillid = 0 |
|
jdMasterid = 0 |
|
OldSkillCount = 0 |
|
NewSkillCount = 0 |
|
if(len(df.index) > 0): |
|
print("Total IDs for Extractraction : " + str(len(df.index))) |
|
for index, row in df.iterrows(): |
|
|
|
if(IsJD): |
|
id_value = row['jdmasterid'] |
|
description_value = row['jobdescription'] |
|
else: |
|
id_value = row['masterid'] |
|
description_value = row['description'] |
|
|
|
filename_jd = row['filename'] |
|
OldSkillCount = 0 |
|
NewSkillCount = 0 |
|
skill_score = 0.0 |
|
print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1)) |
|
|
|
|
|
annotations = skill_extractor.annotate(description_value) |
|
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored'] |
|
skills_list = [] |
|
for result in matches: |
|
if(1==1): |
|
|
|
isOld = "Yes" |
|
skill_id = result['skill_id'] |
|
skill_name1 = skill_extractor.skills_db[skill_id]['skill_name'] |
|
skill_name = skill_name1.split("(")[0].strip() |
|
skill_type = skill_extractor.skills_db[skill_id]['skill_type'] |
|
skill_score = round(result['score'],2) |
|
|
|
|
|
if( skill_name in skills_list): |
|
continue |
|
skills_list.append(skill_name) |
|
|
|
query = "SELECT skillid FROM skillmaster WHERE skillDetails IN (%s)" |
|
params = (skill_name,) |
|
cursor.execute(query, params) |
|
if cursor.rowcount > 0: |
|
print('Skill in DB') |
|
else: |
|
NewSkillCount = NewSkillCount + 1 |
|
isOld = "No" |
|
skill_details = skill_name |
|
weightage = -1.0 |
|
skill_score = skill_score * 100 |
|
skill_score1 = str(skill_score) |
|
|
|
|
|
insert_query = sql.SQL("""INSERT INTO SkillMaster (SkillDetails, SkillType, Weightage, IsActive, skill_score) |
|
VALUES (%s, %s, %s, %s, %s) RETURNING SkillID""") |
|
cursor.execute(insert_query, (skill_details, skill_type, weightage, is_active, skill_score1)) |
|
conn.commit() |
|
|
|
print("Skill Identified : ", skill_name) |
|
|
|
|
|
|
|
|
|
print("Skills Updated for Skills Extraction for file ", filename_jd) |
|
print("Total Skills : ", len(skills_list)) |
|
return SkillExtractorDetails.latestSkillDetails(id_value,db_params,IsJD) |
|
def latestSkillDetails(jid,db_params,IsJD): |
|
data = "" |
|
data = SkillExtractorDetails.display_skills(jid,db_params,IsJD) |
|
|
|
return data |
|
def tuple_to_int(tup): |
|
if len(tup) == 1: |
|
return tup[0] |
|
else: |
|
return tup[0] * (10 ** (len(tup) - 1)) + SkillExtractorDetails.tuple_to_int(tup[1:]) |
|
|
|
|
|
def skill_check(dbQuery,db_params,IsJD): |
|
conn = psycopg2.connect(**db_params) |
|
cursor = conn.cursor() |
|
df = pd.read_sql_query(dbQuery, conn) |
|
Required_Skills='' |
|
for index, row in df.iterrows(): |
|
|
|
skillname = row['skillname'] |
|
Required_Skills = Required_Skills + ', '+ skillname |
|
|
|
Required_Skills = Required_Skills[2:] |
|
return Required_Skills |
|
def display_skills(id, db_params,IsJD): |
|
jd=str(id) |
|
tableName = 'CVSkillDetails' |
|
if(IsJD): |
|
tableName = 'SkillDetails' |
|
|
|
|
|
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore > 99 and skilltype = 'Hard Skill'" |
|
RequiredSkills_Hard = SkillExtractorDetails.skill_check(query,db_params,IsJD) |
|
|
|
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore > 50 and skilltype = 'Soft Skill'" |
|
RequiredSkills_Soft = SkillExtractorDetails.skill_check(query,db_params,IsJD) |
|
|
|
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore < 50 and skilltype = 'Soft Skill'" |
|
RequiredSkills_G1 = SkillExtractorDetails.skill_check(query,db_params,IsJD) |
|
|
|
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore < 99 and skilltype = 'Hard Skill'" |
|
RequiredSkills_G2 = SkillExtractorDetails.skill_check(query,db_params,IsJD) |
|
|
|
print('') |
|
print("Required Skills : " + RequiredSkills_Hard) |
|
print('') |
|
print("Required Soft Skills : " + RequiredSkills_Soft) |
|
print('') |
|
print("Good to have Skills : " + RequiredSkills_G1 + " " + RequiredSkills_G2) |
|
return RequiredSkills_Hard + "@" + RequiredSkills_Soft + "@" + RequiredSkills_G1 + " " + RequiredSkills_G2 |
|
|