File size: 9,805 Bytes
90c20ac 4aba3cf 90c20ac 6329efe 90c20ac 6329efe 90c20ac 6329efe 90c20ac eabd638 90c20ac 5b1173d 90c20ac 950345a eabd638 90c20ac eabd638 957a3f6 90c20ac eabd638 90c20ac eabd638 90c20ac 163e27f 90c20ac 7b81380 90c20ac eabd638 7b81380 90c20ac eabd638 57731b0 eabd638 11f68ea 90c20ac 6329efe 90c20ac eabd638 90c20ac eabd638 90c20ac eabd638 90c20ac eabd638 90c20ac eabd638 90c20ac eabd638 90c20ac 73a90ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import psycopg2
from psycopg2 import sql
import pandas as pd
import re
class SkillExtractorDetails:
def GetSkillId(skillname,jdmasterid,db_params):
#Fetching skill id from skillmaster
conn = psycopg2.connect(**db_params)
cursor = conn.cursor()
query = "select skillid from skillmaster where upper(skilldetails) = (%s)"
params = (skillname.upper(),)
cursor.execute(query, params)
generated_skill_id = cursor.fetchone()[0]
#jdmasterid = 912
#print(generated_skill_id)
#checking if skill id already in skilldetails
query = "SELECT skillid FROM jdSkilldetails WHERE skillid IN (%s) and jdMasterid in (%s)"
params = (generated_skill_id,jdmasterid,)
cursor.execute(query, params)
if cursor.rowcount > 0:
#print("Already")
query =''
else:
#print("Updating in DB")
insert_query = sql.SQL("""INSERT INTO jdSkilldetails (Skillid, jdMasterid) VALUES (%s, %s)""")
cursor.execute(insert_query, (generated_skill_id, jdmasterid))
conn.commit()
cursor.close()
# Close the connection
conn.close()
return generated_skill_id
def skill_Validate(df, skill):
skill = skill.upper()
if (len(skill.split()) < 2 and len(skill) < 3) or len(skill.split())==1:
df['skill_present'] = df['skilldetails'].apply(lambda x: re.match(rf'^{skill}$', x))
if any(df['skill_present']):
#print("Valid Skill")
return 1
else:
#print("Not a Skill")
return 0
elif df['skilldetails'].str.contains(skill.upper()).any():
#print("Valid Skill")
return 1
else:
# print("Not a Skill")
return 0
def getNewSkills(db_params):
query = "select skillid,skilldetails,skilltype,skill_score from skillmaster where weightage = -2"
conn = psycopg2.connect(**db_params)
cursor = conn.cursor()
df_skill_master = pd.read_sql_query(query, conn)
df_skill_master['skilldetails'] = df_skill_master['skilldetails'].str.upper()
cursor.close()
# Close the connection
conn.close()
#print(df_skill_master)
return df_skill_master
def extractWords(job_description,JdMasterid,db_params):
job_roles = []
job_description = job_description.replace(')',' ')
delimiters = ",", " ", " , ", ";","\n","/","\\"
regex_pattern = '|'.join(map(re.escape, delimiters))
df = SkillExtractorDetails.getNewSkills(db_params)
data = re.split(regex_pattern, job_description)
#data = job_description.split(',')
for ds in data:
#print(ds)
try:
if(SkillExtractorDetails.skill_Validate(df,ds.strip())):
job_roles.append(ds)
SkillExtractorDetails.GetSkillId(ds.strip(),JdMasterid,db_params)
print("Skills Identified* : " + ds)
except Exception as error:
test = 1
return job_roles
def SkillExtract(db_params,skill_extractor,JdID, IsJD):
print("Extracting Skills for the JD...")
# Connect to the PostgreSQL database
conn = psycopg2.connect(**db_params)
cursor = conn.cursor()
jd_id = str(JdID)
# Retrieve "id" and "description" columns from the table
#query = sql.SQL("select jdmasterid,jobdescription from JDMaster where isskillsextracted in (0)")
query = "select jdmasterid,jobdescription,filename from JDMaster where isskillsextracted = 0 and jdmasterid ="+ jd_id
CVquery = "select masterid,description,filename from CourseMaster where isskillsextracted = 0 and masterid ="+ jd_id
# Use Pandas to read the data into a DataFrame
if(IsJD):
df = pd.read_sql_query(query, conn)
else:
df = pd.read_sql_query(CVquery, conn)
# Print the DataFrame (for demonstration purposes)
#print(df)
skill_details = ''
skill_type = ''
weightage = -1.0
is_active = True
Skillid = 0
jdMasterid = 0
OldSkillCount = 0
NewSkillCount = 0
if(len(df.index) > 0):
print("Total IDs for Extractraction : " + str(len(df.index)))
for index, row in df.iterrows():
# Access individual columns using column names
if(IsJD):
id_value = row['jdmasterid']
description_value = row['jobdescription']
else:
id_value = row['masterid']
description_value = row['description']
filename_jd = row['filename']
OldSkillCount = 0
NewSkillCount = 0
skill_score = 0.0
print("Extracting Skills For ", filename_jd + " , Id : " + str(id_value) + " , Index " + str(index + 1))
annotations = skill_extractor.annotate(description_value)
matches = annotations['results']['full_matches']+annotations['results']['ngram_scored']
skills_list = []
for result in matches:
if(1==1):
isOld = "Yes"
skill_id = result['skill_id']
skill_name1 = skill_extractor.skills_db[skill_id]['skill_name']
skill_name = skill_name1.split("(")[0].strip()
skill_type = skill_extractor.skills_db[skill_id]['skill_type']
skill_score = round(result['score'],2)
if( skill_name in skills_list):
continue
skills_list.append(skill_name)
#print("Skill Identified : ", j['doc_node_value'])
query = "SELECT skillid FROM skillmaster WHERE skillDetails IN (%s)"
params = (skill_name,) # Replace 'Test' with your actual variable or user input
cursor.execute(query, params)
if cursor.rowcount > 0:
print('Skill in DB')
else:
NewSkillCount = NewSkillCount + 1
isOld = "No"
skill_details = skill_name
weightage = -1.0
skill_score = skill_score * 100
skill_score1 = str(skill_score)
#skill_score = skill_score.astype(float)
#print(skill_score)
insert_query = sql.SQL("""INSERT INTO SkillMaster (SkillDetails, SkillType, Weightage, IsActive, skill_score)
VALUES (%s, %s, %s, %s, %s) RETURNING SkillID""")
cursor.execute(insert_query, (skill_details, skill_type, weightage, is_active, skill_score1))
conn.commit()
print("Skill Identified : ", skill_name)
#print("Skill inserted in SkillMaster and Inserted in JDSkillDetails")
print("Skills Updated for Skills Extraction for file ", filename_jd)
print("Total Skills : ", len(skills_list))
return SkillExtractorDetails.latestSkillDetails(id_value,db_params,IsJD)
def latestSkillDetails(jid,db_params,IsJD):
data = ""
data = SkillExtractorDetails.display_skills(jid,db_params,IsJD)
# jid = df.iat[0,0]
return data
def tuple_to_int(tup):
if len(tup) == 1:
return tup[0]
else:
return tup[0] * (10 ** (len(tup) - 1)) + SkillExtractorDetails.tuple_to_int(tup[1:])
def skill_check(dbQuery,db_params,IsJD):
conn = psycopg2.connect(**db_params)
cursor = conn.cursor()
df = pd.read_sql_query(dbQuery, conn)
Required_Skills=''
for index, row in df.iterrows():
skillname = row['skillname']
Required_Skills = Required_Skills + ', '+ skillname
Required_Skills = Required_Skills[2:]
return Required_Skills
def display_skills(id, db_params,IsJD):
jd=str(id)
tableName = 'CVSkillDetails'
if(IsJD):
tableName = 'SkillDetails'
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore > 99 and skilltype = 'Hard Skill'"
RequiredSkills_Hard = SkillExtractorDetails.skill_check(query,db_params,IsJD)
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore > 50 and skilltype = 'Soft Skill'"
RequiredSkills_Soft = SkillExtractorDetails.skill_check(query,db_params,IsJD)
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore < 50 and skilltype = 'Soft Skill'"
RequiredSkills_G1 = SkillExtractorDetails.skill_check(query,db_params,IsJD)
query = "select skillname from " + tableName +" where id = "+ jd +" and skillscore < 99 and skilltype = 'Hard Skill'"
RequiredSkills_G2 = SkillExtractorDetails.skill_check(query,db_params,IsJD)
print('')
print("Required Skills : " + RequiredSkills_Hard)
print('')
print("Required Soft Skills : " + RequiredSkills_Soft)
print('')
print("Good to have Skills : " + RequiredSkills_G1 + " " + RequiredSkills_G2)
return RequiredSkills_Hard + "@" + RequiredSkills_Soft + "@" + RequiredSkills_G1 + " " + RequiredSkills_G2
|